In [4]:
import os
import sys

module_path = os.path.abspath(os.path.join('../../src'))
if module_path not in sys.path:
    sys.path.append(module_path)

# Pandas helper methods
Since pandas dataframes are the standard in data science, some methods and properties were added to the API to easily adapt from and back to pandas data structures

## Parsing from pandas
### Series as an item

### Dataframes as a collection of items

In [5]:
import pandas as pd
frame = pd.read_csv("../../../Datasets/Temp/train.csv")
frame

Unnamed: 0,Id,MSSubClass,MSZoning,LotFrontage,LotArea,Street,Alley,LotShape,LandContour,Utilities,...,PoolArea,PoolQC,Fence,MiscFeature,MiscVal,MoSold,YrSold,SaleType,SaleCondition,SalePrice
0,1,60,RL,65.0,8450,Pave,,Reg,Lvl,AllPub,...,0,,,,0,2,2008,WD,Normal,208500
1,2,20,RL,80.0,9600,Pave,,Reg,Lvl,AllPub,...,0,,,,0,5,2007,WD,Normal,181500
2,3,60,RL,68.0,11250,Pave,,IR1,Lvl,AllPub,...,0,,,,0,9,2008,WD,Normal,223500
3,4,70,RL,60.0,9550,Pave,,IR1,Lvl,AllPub,...,0,,,,0,2,2006,WD,Abnorml,140000
4,5,60,RL,84.0,14260,Pave,,IR1,Lvl,AllPub,...,0,,,,0,12,2008,WD,Normal,250000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1455,1456,60,RL,62.0,7917,Pave,,Reg,Lvl,AllPub,...,0,,,,0,8,2007,WD,Normal,175000
1456,1457,20,RL,85.0,13175,Pave,,Reg,Lvl,AllPub,...,0,,MnPrv,,0,2,2010,WD,Normal,210000
1457,1458,70,RL,66.0,9042,Pave,,Reg,Lvl,AllPub,...,0,,GdPrv,Shed,2500,5,2010,WD,Normal,266500
1458,1459,20,RL,68.0,9717,Pave,,Reg,Lvl,AllPub,...,0,,,,0,4,2010,WD,Normal,142125


In [6]:
import framework.parser.pandas_parser as parser
from random import choice

from framework.property import CategoricalProperty, NumericalProperty

mapping = {
    "MSSubClass": CategoricalProperty,
    "MSZoning": CategoricalProperty,
    "LotFrontage": NumericalProperty,
    "LotArea": NumericalProperty,
    "Street": CategoricalProperty,
    "Alley": CategoricalProperty,
    "LotShape": CategoricalProperty,
    "LandContour": CategoricalProperty,
    "Utilities": CategoricalProperty,
    "LotConfig": CategoricalProperty,
    "LandSlope": CategoricalProperty,
    "Neighborhood": CategoricalProperty,
    "Condition1": CategoricalProperty,
    "Condition2": CategoricalProperty,
    "BldgType": CategoricalProperty,
    "HouseStyle": CategoricalProperty
}

items = parser.parse_dataframe_as_items(frame, mapping, id_col="Id")
print(len(items))
print(choice(items))

1460
<id:1001, properties:{'MSSubClass': '20', 'MSZoning': 'RL', 'LotFrontage': '74.0', 'LotArea': '10206.0', 'Street': 'Pave', 'Alley': 'nan', 'LotShape': 'Reg', 'LandContour': 'Lvl', 'Utilities': 'AllPub', 'LotConfig': 'Corner', 'LandSlope': 'Gtl', 'Neighborhood': 'Edwards', 'Condition1': 'Norm', 'Condition2': 'Norm', 'BldgType': '1Fam', 'HouseStyle': '1Story'}>


## Converting back to pandas
### Item as Series

In [7]:
item = items[0]
series = item.as_series()
print(series)

MSSubClass           60
MSZoning             RL
LotFrontage        65.0
LotArea          8450.0
Street             Pave
Alley               nan
LotShape            Reg
LandContour         Lvl
Utilities        AllPub
LotConfig        Inside
LandSlope           Gtl
Neighborhood    CollgCr
Condition1         Norm
Condition2         Norm
BldgType           1Fam
HouseStyle       2Story
Name: 1, dtype: object


### Collection of items as dataframe

In [9]:
import framework.parser.pandas_parser as pd_parser

other_way_to_create_item_series = pd_parser.item_as_series(item)
other_frame = pd_parser.items_as_dataframe(items)
other_frame

Unnamed: 0,MSSubClass,MSZoning,LotFrontage,LotArea,Street,Alley,LotShape,LandContour,Utilities,LotConfig,LandSlope,Neighborhood,Condition1,Condition2,BldgType,HouseStyle
1,60,RL,65.0,8450.0,Pave,,Reg,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,Norm,1Fam,2Story
2,20,RL,80.0,9600.0,Pave,,Reg,Lvl,AllPub,FR2,Gtl,Veenker,Feedr,Norm,1Fam,1Story
3,60,RL,68.0,11250.0,Pave,,IR1,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,Norm,1Fam,2Story
4,70,RL,60.0,9550.0,Pave,,IR1,Lvl,AllPub,Corner,Gtl,Crawfor,Norm,Norm,1Fam,2Story
5,60,RL,84.0,14260.0,Pave,,IR1,Lvl,AllPub,FR2,Gtl,NoRidge,Norm,Norm,1Fam,2Story
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1456,60,RL,62.0,7917.0,Pave,,Reg,Lvl,AllPub,Inside,Gtl,Gilbert,Norm,Norm,1Fam,2Story
1457,20,RL,85.0,13175.0,Pave,,Reg,Lvl,AllPub,Inside,Gtl,NWAmes,Norm,Norm,1Fam,1Story
1458,70,RL,66.0,9042.0,Pave,,Reg,Lvl,AllPub,Inside,Gtl,Crawfor,Norm,Norm,1Fam,2Story
1459,20,RL,68.0,9717.0,Pave,,Reg,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,1Story
