diff --git a/.circleci/config.yml b/.circleci/config.yml index 037645ab2..acca739f4 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -1,4 +1,4 @@ -version: '2.1' +version: "2.1" orbs: node: circleci/node@5.1.0 @@ -48,7 +48,7 @@ jobs: - checkout: path: ~/project/ - node/install: - node-version: '16.13' + node-version: "16.13" - run: node --version - run: npm i -g @railway/cli - run: @@ -76,7 +76,6 @@ jobs: command: | tox -e publish_model - section_08_deploy_app_container_via_railway: <<: *defaults steps: @@ -86,7 +85,7 @@ jobs: - checkout: path: ~/project/ - node/install: - node-version: '16.13' + node-version: "16.13" - run: node --version - run: npm i -g @railway/cli - run: @@ -155,18 +154,18 @@ jobs: steps: - checkout - *prepare_venv - - run: + - run: name: Install requirements command: | . venv/bin/activate pip install -r packages/regression_model/requirements.txt - *fetch_data - - run: + - run: name: Train model command: | . venv/bin/activate PYTHONPATH=./packages/regression_model python3 packages/regression_model/regression_model/train_pipeline.py - - run: + - run: name: Publish model to Gemfury command: | . venv/bin/activate @@ -202,11 +201,11 @@ jobs: name: Setup Heroku CLI command: | wget -qO- https://cli-assets.heroku.com/install-ubuntu.sh | sh - - run: + - run: name: Build and Push Image command: | make build-ml-api-heroku push-ml-api-heroku - - run: + - run: name: Release to Heroku command: | heroku container:release web --app $HEROKU_APP_NAME @@ -232,7 +231,7 @@ jobs: steps: - checkout - *prepare_venv - - run: + - run: name: Install requirements command: | . venv/bin/activate @@ -243,19 +242,18 @@ jobs: . venv/bin/activate chmod +x ./scripts/fetch_kaggle_large_dataset.sh ./scripts/fetch_kaggle_large_dataset.sh - - run: + - run: name: Train model command: | . venv/bin/activate PYTHONPATH=./packages/neural_network_model python3 packages/neural_network_model/neural_network_model/train_pipeline.py - - run: + - run: name: Publish model to Gemfury command: | . venv/bin/activate chmod +x ./scripts/publish_model.sh ./scripts/publish_model.sh ./packages/neural_network_model/ - tags_only: &tags_only filters: branches: @@ -281,12 +279,11 @@ workflows: <<: *tags_only - section_08_deploy_app_container_via_railway: - filters: - branches: - only: - - master - - demo - + filters: + branches: + only: + - master + - demo # test-all: # jobs: # - test_regression_model_py36 diff --git a/section-04-research-and-development/01-machine-learning-pipeline-data-analysis.ipynb b/section-04-research-and-development/01-machine-learning-pipeline-data-analysis.ipynb index df3c3c9f1..c713a98fa 100644 --- a/section-04-research-and-development/01-machine-learning-pipeline-data-analysis.ipynb +++ b/section-04-research-and-development/01-machine-learning-pipeline-data-analysis.ipynb @@ -97,642 +97,26 @@ "metadata": {}, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "(1460, 81)\n" - ] - }, - { - "data": { - "text/html": [ - "
| \n", - " | Id | \n", - "MSSubClass | \n", - "MSZoning | \n", - "LotFrontage | \n", - "LotArea | \n", - "Street | \n", - "Alley | \n", - "LotShape | \n", - "LandContour | \n", - "Utilities | \n", - "LotConfig | \n", - "LandSlope | \n", - "Neighborhood | \n", - "Condition1 | \n", - "Condition2 | \n", - "BldgType | \n", - "HouseStyle | \n", - "OverallQual | \n", - "OverallCond | \n", - "YearBuilt | \n", - "YearRemodAdd | \n", - "RoofStyle | \n", - "RoofMatl | \n", - "Exterior1st | \n", - "Exterior2nd | \n", - "MasVnrType | \n", - "MasVnrArea | \n", - "ExterQual | \n", - "ExterCond | \n", - "Foundation | \n", - "BsmtQual | \n", - "BsmtCond | \n", - "BsmtExposure | \n", - "BsmtFinType1 | \n", - "BsmtFinSF1 | \n", - "BsmtFinType2 | \n", - "BsmtFinSF2 | \n", - "BsmtUnfSF | \n", - "TotalBsmtSF | \n", - "Heating | \n", - "HeatingQC | \n", - "CentralAir | \n", - "Electrical | \n", - "1stFlrSF | \n", - "2ndFlrSF | \n", - "LowQualFinSF | \n", - "GrLivArea | \n", - "BsmtFullBath | \n", - "BsmtHalfBath | \n", - "FullBath | \n", - "HalfBath | \n", - "BedroomAbvGr | \n", - "KitchenAbvGr | \n", - "KitchenQual | \n", - "TotRmsAbvGrd | \n", - "Functional | \n", - "Fireplaces | \n", - "FireplaceQu | \n", - "GarageType | \n", - "GarageYrBlt | \n", - "GarageFinish | \n", - "GarageCars | \n", - "GarageArea | \n", - "GarageQual | \n", - "GarageCond | \n", - "PavedDrive | \n", - "WoodDeckSF | \n", - "OpenPorchSF | \n", - "EnclosedPorch | \n", - "3SsnPorch | \n", - "ScreenPorch | \n", - "PoolArea | \n", - "PoolQC | \n", - "Fence | \n", - "MiscFeature | \n", - "MiscVal | \n", - "MoSold | \n", - "YrSold | \n", - "SaleType | \n", - "SaleCondition | \n", - "SalePrice | \n", - "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", - "1 | \n", - "60 | \n", - "RL | \n", - "65.0 | \n", - "8450 | \n", - "Pave | \n", - "NaN | \n", - "Reg | \n", - "Lvl | \n", - "AllPub | \n", - "Inside | \n", - "Gtl | \n", - "CollgCr | \n", - "Norm | \n", - "Norm | \n", - "1Fam | \n", - "2Story | \n", - "7 | \n", - "5 | \n", - "2003 | \n", - "2003 | \n", - "Gable | \n", - "CompShg | \n", - "VinylSd | \n", - "VinylSd | \n", - "BrkFace | \n", - "196.0 | \n", - "Gd | \n", - "TA | \n", - "PConc | \n", - "Gd | \n", - "TA | \n", - "No | \n", - "GLQ | \n", - "706 | \n", - "Unf | \n", - "0 | \n", - "150 | \n", - "856 | \n", - "GasA | \n", - "Ex | \n", - "Y | \n", - "SBrkr | \n", - "856 | \n", - "854 | \n", - "0 | \n", - "1710 | \n", - "1 | \n", - "0 | \n", - "2 | \n", - "1 | \n", - "3 | \n", - "1 | \n", - "Gd | \n", - "8 | \n", - "Typ | \n", - "0 | \n", - "NaN | \n", - "Attchd | \n", - "2003.0 | \n", - "RFn | \n", - "2 | \n", - "548 | \n", - "TA | \n", - "TA | \n", - "Y | \n", - "0 | \n", - "61 | \n", - "0 | \n", - "0 | \n", - "0 | \n", - "0 | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "0 | \n", - "2 | \n", - "2008 | \n", - "WD | \n", - "Normal | \n", - "208500 | \n", - "
| 1 | \n", - "2 | \n", - "20 | \n", - "RL | \n", - "80.0 | \n", - "9600 | \n", - "Pave | \n", - "NaN | \n", - "Reg | \n", - "Lvl | \n", - "AllPub | \n", - "FR2 | \n", - "Gtl | \n", - "Veenker | \n", - "Feedr | \n", - "Norm | \n", - "1Fam | \n", - "1Story | \n", - "6 | \n", - "8 | \n", - "1976 | \n", - "1976 | \n", - "Gable | \n", - "CompShg | \n", - "MetalSd | \n", - "MetalSd | \n", - "None | \n", - "0.0 | \n", - "TA | \n", - "TA | \n", - "CBlock | \n", - "Gd | \n", - "TA | \n", - "Gd | \n", - "ALQ | \n", - "978 | \n", - "Unf | \n", - "0 | \n", - "284 | \n", - "1262 | \n", - "GasA | \n", - "Ex | \n", - "Y | \n", - "SBrkr | \n", - "1262 | \n", - "0 | \n", - "0 | \n", - "1262 | \n", - "0 | \n", - "1 | \n", - "2 | \n", - "0 | \n", - "3 | \n", - "1 | \n", - "TA | \n", - "6 | \n", - "Typ | \n", - "1 | \n", - "TA | \n", - "Attchd | \n", - "1976.0 | \n", - "RFn | \n", - "2 | \n", - "460 | \n", - "TA | \n", - "TA | \n", - "Y | \n", - "298 | \n", - "0 | \n", - "0 | \n", - "0 | \n", - "0 | \n", - "0 | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "0 | \n", - "5 | \n", - "2007 | \n", - "WD | \n", - "Normal | \n", - "181500 | \n", - "
| 2 | \n", - "3 | \n", - "60 | \n", - "RL | \n", - "68.0 | \n", - "11250 | \n", - "Pave | \n", - "NaN | \n", - "IR1 | \n", - "Lvl | \n", - "AllPub | \n", - "Inside | \n", - "Gtl | \n", - "CollgCr | \n", - "Norm | \n", - "Norm | \n", - "1Fam | \n", - "2Story | \n", - "7 | \n", - "5 | \n", - "2001 | \n", - "2002 | \n", - "Gable | \n", - "CompShg | \n", - "VinylSd | \n", - "VinylSd | \n", - "BrkFace | \n", - "162.0 | \n", - "Gd | \n", - "TA | \n", - "PConc | \n", - "Gd | \n", - "TA | \n", - "Mn | \n", - "GLQ | \n", - "486 | \n", - "Unf | \n", - "0 | \n", - "434 | \n", - "920 | \n", - "GasA | \n", - "Ex | \n", - "Y | \n", - "SBrkr | \n", - "920 | \n", - "866 | \n", - "0 | \n", - "1786 | \n", - "1 | \n", - "0 | \n", - "2 | \n", - "1 | \n", - "3 | \n", - "1 | \n", - "Gd | \n", - "6 | \n", - "Typ | \n", - "1 | \n", - "TA | \n", - "Attchd | \n", - "2001.0 | \n", - "RFn | \n", - "2 | \n", - "608 | \n", - "TA | \n", - "TA | \n", - "Y | \n", - "0 | \n", - "42 | \n", - "0 | \n", - "0 | \n", - "0 | \n", - "0 | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "0 | \n", - "9 | \n", - "2008 | \n", - "WD | \n", - "Normal | \n", - "223500 | \n", - "
| 3 | \n", - "4 | \n", - "70 | \n", - "RL | \n", - "60.0 | \n", - "9550 | \n", - "Pave | \n", - "NaN | \n", - "IR1 | \n", - "Lvl | \n", - "AllPub | \n", - "Corner | \n", - "Gtl | \n", - "Crawfor | \n", - "Norm | \n", - "Norm | \n", - "1Fam | \n", - "2Story | \n", - "7 | \n", - "5 | \n", - "1915 | \n", - "1970 | \n", - "Gable | \n", - "CompShg | \n", - "Wd Sdng | \n", - "Wd Shng | \n", - "None | \n", - "0.0 | \n", - "TA | \n", - "TA | \n", - "BrkTil | \n", - "TA | \n", - "Gd | \n", - "No | \n", - "ALQ | \n", - "216 | \n", - "Unf | \n", - "0 | \n", - "540 | \n", - "756 | \n", - "GasA | \n", - "Gd | \n", - "Y | \n", - "SBrkr | \n", - "961 | \n", - "756 | \n", - "0 | \n", - "1717 | \n", - "1 | \n", - "0 | \n", - "1 | \n", - "0 | \n", - "3 | \n", - "1 | \n", - "Gd | \n", - "7 | \n", - "Typ | \n", - "1 | \n", - "Gd | \n", - "Detchd | \n", - "1998.0 | \n", - "Unf | \n", - "3 | \n", - "642 | \n", - "TA | \n", - "TA | \n", - "Y | \n", - "0 | \n", - "35 | \n", - "272 | \n", - "0 | \n", - "0 | \n", - "0 | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "0 | \n", - "2 | \n", - "2006 | \n", - "WD | \n", - "Abnorml | \n", - "140000 | \n", - "
| 4 | \n", - "5 | \n", - "60 | \n", - "RL | \n", - "84.0 | \n", - "14260 | \n", - "Pave | \n", - "NaN | \n", - "IR1 | \n", - "Lvl | \n", - "AllPub | \n", - "FR2 | \n", - "Gtl | \n", - "NoRidge | \n", - "Norm | \n", - "Norm | \n", - "1Fam | \n", - "2Story | \n", - "8 | \n", - "5 | \n", - "2000 | \n", - "2000 | \n", - "Gable | \n", - "CompShg | \n", - "VinylSd | \n", - "VinylSd | \n", - "BrkFace | \n", - "350.0 | \n", - "Gd | \n", - "TA | \n", - "PConc | \n", - "Gd | \n", - "TA | \n", - "Av | \n", - "GLQ | \n", - "655 | \n", - "Unf | \n", - "0 | \n", - "490 | \n", - "1145 | \n", - "GasA | \n", - "Ex | \n", - "Y | \n", - "SBrkr | \n", - "1145 | \n", - "1053 | \n", - "0 | \n", - "2198 | \n", - "1 | \n", - "0 | \n", - "2 | \n", - "1 | \n", - "4 | \n", - "1 | \n", - "Gd | \n", - "9 | \n", - "Typ | \n", - "1 | \n", - "TA | \n", - "Attchd | \n", - "2000.0 | \n", - "RFn | \n", - "3 | \n", - "836 | \n", - "TA | \n", - "TA | \n", - "Y | \n", - "192 | \n", - "84 | \n", - "0 | \n", - "0 | \n", - "0 | \n", - "0 | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "0 | \n", - "12 | \n", - "2008 | \n", - "WD | \n", - "Normal | \n", - "250000 | \n", - "
Pipeline(steps=[('missing_imputation',\n",
+ " CategoricalImputer(variables=['FireplaceQu'])),\n",
+ " ('frequent_imputation',\n",
+ " CategoricalImputer(imputation_method='frequent',\n",
+ " variables=['BsmtQual', 'BsmtExposure',\n",
+ " 'BsmtFinType1',\n",
+ " 'GarageFinish'])),\n",
+ " ('missing_indicator',\n",
+ " AddMissingIndicator(variables=['LotFrontage'])),\n",
+ " ('mean_imputation',\n",
+ " MeanMedianImputer(imputation_method=...\n",
+ " 'Foundation', 'CentralAir',\n",
+ " 'Functional', 'PavedDrive',\n",
+ " 'SaleCondition'])),\n",
+ " ('categorical_encoder',\n",
+ " OrdinalEncoder(variables=['MSSubClass', 'MSZoning', 'LotShape',\n",
+ " 'LandContour', 'LotConfig',\n",
+ " 'Neighborhood', 'RoofStyle',\n",
+ " 'Exterior1st', 'Foundation',\n",
+ " 'CentralAir', 'Functional',\n",
+ " 'PavedDrive', 'SaleCondition'])),\n",
+ " ('scaler', MinMaxScaler()),\n",
+ " ('Lasso', Lasso(alpha=0.001, random_state=0))])In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. Pipeline(steps=[('missing_imputation',\n",
+ " CategoricalImputer(variables=['FireplaceQu'])),\n",
+ " ('frequent_imputation',\n",
+ " CategoricalImputer(imputation_method='frequent',\n",
+ " variables=['BsmtQual', 'BsmtExposure',\n",
+ " 'BsmtFinType1',\n",
+ " 'GarageFinish'])),\n",
+ " ('missing_indicator',\n",
+ " AddMissingIndicator(variables=['LotFrontage'])),\n",
+ " ('mean_imputation',\n",
+ " MeanMedianImputer(imputation_method=...\n",
+ " 'Foundation', 'CentralAir',\n",
+ " 'Functional', 'PavedDrive',\n",
+ " 'SaleCondition'])),\n",
+ " ('categorical_encoder',\n",
+ " OrdinalEncoder(variables=['MSSubClass', 'MSZoning', 'LotShape',\n",
+ " 'LandContour', 'LotConfig',\n",
+ " 'Neighborhood', 'RoofStyle',\n",
+ " 'Exterior1st', 'Foundation',\n",
+ " 'CentralAir', 'Functional',\n",
+ " 'PavedDrive', 'SaleCondition'])),\n",
+ " ('scaler', MinMaxScaler()),\n",
+ " ('Lasso', Lasso(alpha=0.001, random_state=0))])CategoricalImputer(variables=['FireplaceQu'])
CategoricalImputer(imputation_method='frequent',\n", + " variables=['BsmtQual', 'BsmtExposure', 'BsmtFinType1',\n", + " 'GarageFinish'])
AddMissingIndicator(variables=['LotFrontage'])
MeanMedianImputer(imputation_method='mean', variables=['LotFrontage'])
TemporalVariableTransformer(reference_variable='YrSold',\n", + " variables=['YearRemodAdd'])
DropFeatures(features_to_drop=['YrSold'])
LogTransformer(variables=['LotFrontage', '1stFlrSF', 'GrLivArea'])
SklearnTransformerWrapper(transformer=Binarizer(threshold=0),\n", + " variables=['ScreenPorch'])
Binarizer(threshold=0)
Binarizer(threshold=0)
Mapper(mappings={'Ex': 5, 'Fa': 2, 'Gd': 4, 'Missing': 0, 'NA': 0, 'Po': 1,\n",
+ " 'TA': 3},\n",
+ " variables=['ExterQual', 'BsmtQual', 'HeatingQC', 'KitchenQual',\n",
+ " 'FireplaceQu'])Mapper(mappings={'Av': 3, 'Gd': 4, 'Mn': 2, 'No': 1},\n",
+ " variables=['BsmtExposure'])Mapper(mappings={'ALQ': 5, 'BLQ': 4, 'GLQ': 6, 'LwQ': 2, 'Missing': 0, 'NA': 0,\n",
+ " 'Rec': 3, 'Unf': 1},\n",
+ " variables=['BsmtFinType1'])Mapper(mappings={'Fin': 3, 'Missing': 0, 'NA': 0, 'RFn': 2, 'Unf': 1},\n",
+ " variables=['GarageFinish'])RareLabelEncoder(n_categories=1, tol=0.01,\n", + " variables=['MSSubClass', 'MSZoning', 'LotShape', 'LandContour',\n", + " 'LotConfig', 'Neighborhood', 'RoofStyle',\n", + " 'Exterior1st', 'Foundation', 'CentralAir',\n", + " 'Functional', 'PavedDrive', 'SaleCondition'])
OrdinalEncoder(variables=['MSSubClass', 'MSZoning', 'LotShape', 'LandContour',\n", + " 'LotConfig', 'Neighborhood', 'RoofStyle',\n", + " 'Exterior1st', 'Foundation', 'CentralAir',\n", + " 'Functional', 'PavedDrive', 'SaleCondition'])
MinMaxScaler()
Lasso(alpha=0.001, random_state=0)