diff --git a/lab-feature-engineering.ipynb b/lab-feature-engineering.ipynb new file mode 100644 index 0000000..9b542e7 --- /dev/null +++ b/lab-feature-engineering.ipynb @@ -0,0 +1,3104 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 244, + "id": "e1aeb6d7", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "\n", + "import warnings\n", + "warnings.filterwarnings ('ignore')\n", + "\n", + "import matplotlib.pyplot as plt\n", + "%matplotlib inline\n", + "\n", + "import seaborn as sns\n", + "\n", + "import statsmodels.api as sm\n", + "from statsmodels.formula.api import ols" + ] + }, + { + "cell_type": "code", + "execution_count": 245, + "id": "150bf899", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ODATEDWOSOURCETCODESTATEZIPMAILCODEPVASTATEDOBNOEXCHRECINHSE...TARGET_DHPHONE_DRFA_2RRFA_2FRFA_2AMDMAUD_RMDMAUD_FMDMAUD_ACLUSTER2GEOCODE2
08901GRI0IL6108137120...0.00L4EXXX39.0C
19401BOA1CA9132652020...0.00L2GXXX1.0A
29001AMH1NC2701700...0.01L4EXXX60.0C
38701BRY0CA9595328010...0.01L4EXXX41.0C
486010FL3317620010X...0.01L2FXXX26.0A
..................................................................
954079601ASE1AK9950400...0.00L1GXXX12.0C
954089601DCD1TX7737950010...0.01L1FXXX2.0A
954099501MBC1MI4891038010...0.01L3EXXX34.0B
954108601PRV0CA9132040050X...18.01L4FXXX11.0A
954118801MCC2NC2840918010X...0.01L1GC1C12.0C
\n", + "

95412 rows × 481 columns

\n", + "
" + ], + "text/plain": [ + " ODATEDW OSOURCE TCODE STATE ZIP MAILCODE PVASTATE DOB NOEXCH \\\n", + "0 8901 GRI 0 IL 61081 3712 0 \n", + "1 9401 BOA 1 CA 91326 5202 0 \n", + "2 9001 AMH 1 NC 27017 0 0 \n", + "3 8701 BRY 0 CA 95953 2801 0 \n", + "4 8601 0 FL 33176 2001 0 \n", + "... ... ... ... ... ... ... ... ... ... \n", + "95407 9601 ASE 1 AK 99504 0 0 \n", + "95408 9601 DCD 1 TX 77379 5001 0 \n", + "95409 9501 MBC 1 MI 48910 3801 0 \n", + "95410 8601 PRV 0 CA 91320 4005 0 \n", + "95411 8801 MCC 2 NC 28409 1801 0 \n", + "\n", + " RECINHSE ... TARGET_D HPHONE_D RFA_2R RFA_2F RFA_2A MDMAUD_R MDMAUD_F \\\n", + "0 ... 0.0 0 L 4 E X X \n", + "1 ... 0.0 0 L 2 G X X \n", + "2 ... 0.0 1 L 4 E X X \n", + "3 ... 0.0 1 L 4 E X X \n", + "4 X ... 0.0 1 L 2 F X X \n", + "... ... ... ... ... ... ... ... ... ... \n", + "95407 ... 0.0 0 L 1 G X X \n", + "95408 ... 0.0 1 L 1 F X X \n", + "95409 ... 0.0 1 L 3 E X X \n", + "95410 X ... 18.0 1 L 4 F X X \n", + "95411 X ... 0.0 1 L 1 G C 1 \n", + "\n", + " MDMAUD_A CLUSTER2 GEOCODE2 \n", + "0 X 39.0 C \n", + "1 X 1.0 A \n", + "2 X 60.0 C \n", + "3 X 41.0 C \n", + "4 X 26.0 A \n", + "... ... ... ... \n", + "95407 X 12.0 C \n", + "95408 X 2.0 A \n", + "95409 X 34.0 B \n", + "95410 X 11.0 A \n", + "95411 C 12.0 C \n", + "\n", + "[95412 rows x 481 columns]" + ] + }, + "execution_count": 245, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data = pd.read_csv(r\"learningSet.txt\")\n", + "data" + ] + }, + { + "cell_type": "markdown", + "id": "498bfe98", + "metadata": {}, + "source": [ + "#### Check for null values in all the columns" + ] + }, + { + "cell_type": "code", + "execution_count": 246, + "id": "cbe29b65", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "ODATEDW 0\n", + "OSOURCE 0\n", + "TCODE 0\n", + "STATE 0\n", + "ZIP 0\n", + " ... \n", + "MDMAUD_R 0\n", + "MDMAUD_F 0\n", + "MDMAUD_A 0\n", + "CLUSTER2 132\n", + "GEOCODE2 132\n", + "Length: 481, dtype: int64" + ] + }, + "execution_count": 246, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.isna().sum()\n", + "# most columns might have missing values, if we drop NAs, we are going to remove basically all rows\n", + "# so an option is to *remove the columns* with the most missing values" + ] + }, + { + "cell_type": "markdown", + "id": "98b12828", + "metadata": {}, + "source": [ + "#### Identify columns that over 85% missing values" + ] + }, + { + "cell_type": "code", + "execution_count": 247, + "id": "2069937d", + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "ODATEDW 0.000000\n", + "OSOURCE 0.000000\n", + "TCODE 0.000000\n", + "STATE 0.000000\n", + "ZIP 0.000000\n", + " ... \n", + "MDMAUD_R 0.000000\n", + "MDMAUD_F 0.000000\n", + "MDMAUD_A 0.000000\n", + "CLUSTER2 0.001383\n", + "GEOCODE2 0.001383\n", + "Length: 481, dtype: float64" + ] + }, + "execution_count": 247, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "null_df = data.isna().sum()/len(data)\n", + "null_df\n", + "# we need to stablish a percentage of NAs to drop columns or not" + ] + }, + { + "cell_type": "code", + "execution_count": 248, + "id": "5dd5e223", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
column_namespercentage_of_nulls
0ODATEDW0.000000
1OSOURCE0.000000
2TCODE0.000000
3STATE0.000000
4ZIP0.000000
.........
476MDMAUD_R0.000000
477MDMAUD_F0.000000
478MDMAUD_A0.000000
479CLUSTER20.001383
480GEOCODE20.001383
\n", + "

481 rows × 2 columns

\n", + "
" + ], + "text/plain": [ + " column_names percentage_of_nulls\n", + "0 ODATEDW 0.000000\n", + "1 OSOURCE 0.000000\n", + "2 TCODE 0.000000\n", + "3 STATE 0.000000\n", + "4 ZIP 0.000000\n", + ".. ... ...\n", + "476 MDMAUD_R 0.000000\n", + "477 MDMAUD_F 0.000000\n", + "478 MDMAUD_A 0.000000\n", + "479 CLUSTER2 0.001383\n", + "480 GEOCODE2 0.001383\n", + "\n", + "[481 rows x 2 columns]" + ] + }, + "execution_count": 248, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "null_df = pd.DataFrame(null_df).reset_index()\n", + "null_df.columns = ['column_names','percentage_of_nulls']\n", + "null_df" + ] + }, + { + "cell_type": "code", + "execution_count": 249, + "id": "7e27ac51", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
column_namespercentage_of_nulls
414RDATE_50.999906
436RAMNT_50.999906
412RDATE_30.997464
434RAMNT_30.997464
413RDATE_40.997055
.........
168ETHC30.000000
167ETHC20.000000
166ETHC10.000000
165HHD120.000000
240TPE110.000000
\n", + "

481 rows × 2 columns

\n", + "
" + ], + "text/plain": [ + " column_names percentage_of_nulls\n", + "414 RDATE_5 0.999906\n", + "436 RAMNT_5 0.999906\n", + "412 RDATE_3 0.997464\n", + "434 RAMNT_3 0.997464\n", + "413 RDATE_4 0.997055\n", + ".. ... ...\n", + "168 ETHC3 0.000000\n", + "167 ETHC2 0.000000\n", + "166 ETHC1 0.000000\n", + "165 HHD12 0.000000\n", + "240 TPE11 0.000000\n", + "\n", + "[481 rows x 2 columns]" + ] + }, + "execution_count": 249, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "null_df = null_df.sort_values(by = 'percentage_of_nulls', ascending = False )\n", + "null_df" + ] + }, + { + "cell_type": "code", + "execution_count": 250, + "id": "2b8e862f", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
column_namespercentage_of_nulls
414RDATE_50.999906
436RAMNT_50.999906
412RDATE_30.997464
434RAMNT_30.997464
413RDATE_40.997055
435RAMNT_40.997055
437RAMNT_60.991867
415RDATE_60.991867
446RAMNT_150.923888
424RDATE_150.923888
432RDATE_230.917631
454RAMNT_230.917631
429RDATE_200.917327
451RAMNT_200.917327
438RAMNT_70.906773
416RDATE_70.906773
448RAMNT_170.901469
426RDATE_170.901469
430RDATE_210.900296
452RAMNT_210.900296
441RAMNT_100.890360
419RDATE_100.890360
422RDATE_130.871609
444RAMNT_130.871609
23NUMCHLD0.870184
\n", + "
" + ], + "text/plain": [ + " column_names percentage_of_nulls\n", + "414 RDATE_5 0.999906\n", + "436 RAMNT_5 0.999906\n", + "412 RDATE_3 0.997464\n", + "434 RAMNT_3 0.997464\n", + "413 RDATE_4 0.997055\n", + "435 RAMNT_4 0.997055\n", + "437 RAMNT_6 0.991867\n", + "415 RDATE_6 0.991867\n", + "446 RAMNT_15 0.923888\n", + "424 RDATE_15 0.923888\n", + "432 RDATE_23 0.917631\n", + "454 RAMNT_23 0.917631\n", + "429 RDATE_20 0.917327\n", + "451 RAMNT_20 0.917327\n", + "438 RAMNT_7 0.906773\n", + "416 RDATE_7 0.906773\n", + "448 RAMNT_17 0.901469\n", + "426 RDATE_17 0.901469\n", + "430 RDATE_21 0.900296\n", + "452 RAMNT_21 0.900296\n", + "441 RAMNT_10 0.890360\n", + "419 RDATE_10 0.890360\n", + "422 RDATE_13 0.871609\n", + "444 RAMNT_13 0.871609\n", + "23 NUMCHLD 0.870184" + ] + }, + "execution_count": 250, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "threshold = 0.85\n", + "\n", + "condition = null_df['percentage_of_nulls'] > threshold\n", + "columns_above_threshold = null_df[condition]\n", + "columns_above_threshold" + ] + }, + { + "cell_type": "markdown", + "id": "8416a5d2", + "metadata": {}, + "source": [ + "Remove those columns from the dataframe" + ] + }, + { + "cell_type": "code", + "execution_count": 251, + "id": "25aa7d8e", + "metadata": {}, + "outputs": [], + "source": [ + "drop_column_list = list(columns_above_threshold['column_names'])\n", + "data = data.drop(columns = drop_column_list)" + ] + }, + { + "cell_type": "code", + "execution_count": 252, + "id": "38744dde", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ODATEDWOSOURCETCODESTATEZIPMAILCODEPVASTATEDOBNOEXCHRECINHSE...TARGET_DHPHONE_DRFA_2RRFA_2FRFA_2AMDMAUD_RMDMAUD_FMDMAUD_ACLUSTER2GEOCODE2
08901GRI0IL6108137120...0.00L4EXXX39.0C
19401BOA1CA9132652020...0.00L2GXXX1.0A
29001AMH1NC2701700...0.01L4EXXX60.0C
38701BRY0CA9595328010...0.01L4EXXX41.0C
486010FL3317620010X...0.01L2FXXX26.0A
..................................................................
954079601ASE1AK9950400...0.00L1GXXX12.0C
954089601DCD1TX7737950010...0.01L1FXXX2.0A
954099501MBC1MI4891038010...0.01L3EXXX34.0B
954108601PRV0CA9132040050X...18.01L4FXXX11.0A
954118801MCC2NC2840918010X...0.01L1GC1C12.0C
\n", + "

95412 rows × 456 columns

\n", + "
" + ], + "text/plain": [ + " ODATEDW OSOURCE TCODE STATE ZIP MAILCODE PVASTATE DOB NOEXCH \\\n", + "0 8901 GRI 0 IL 61081 3712 0 \n", + "1 9401 BOA 1 CA 91326 5202 0 \n", + "2 9001 AMH 1 NC 27017 0 0 \n", + "3 8701 BRY 0 CA 95953 2801 0 \n", + "4 8601 0 FL 33176 2001 0 \n", + "... ... ... ... ... ... ... ... ... ... \n", + "95407 9601 ASE 1 AK 99504 0 0 \n", + "95408 9601 DCD 1 TX 77379 5001 0 \n", + "95409 9501 MBC 1 MI 48910 3801 0 \n", + "95410 8601 PRV 0 CA 91320 4005 0 \n", + "95411 8801 MCC 2 NC 28409 1801 0 \n", + "\n", + " RECINHSE ... TARGET_D HPHONE_D RFA_2R RFA_2F RFA_2A MDMAUD_R MDMAUD_F \\\n", + "0 ... 0.0 0 L 4 E X X \n", + "1 ... 0.0 0 L 2 G X X \n", + "2 ... 0.0 1 L 4 E X X \n", + "3 ... 0.0 1 L 4 E X X \n", + "4 X ... 0.0 1 L 2 F X X \n", + "... ... ... ... ... ... ... ... ... ... \n", + "95407 ... 0.0 0 L 1 G X X \n", + "95408 ... 0.0 1 L 1 F X X \n", + "95409 ... 0.0 1 L 3 E X X \n", + "95410 X ... 18.0 1 L 4 F X X \n", + "95411 X ... 0.0 1 L 1 G C 1 \n", + "\n", + " MDMAUD_A CLUSTER2 GEOCODE2 \n", + "0 X 39.0 C \n", + "1 X 1.0 A \n", + "2 X 60.0 C \n", + "3 X 41.0 C \n", + "4 X 26.0 A \n", + "... ... ... ... \n", + "95407 X 12.0 C \n", + "95408 X 2.0 A \n", + "95409 X 34.0 B \n", + "95410 X 11.0 A \n", + "95411 C 12.0 C \n", + "\n", + "[95412 rows x 456 columns]" + ] + }, + "execution_count": 252, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data\n", + "# dropped from 481 to 456 columns" + ] + }, + { + "cell_type": "markdown", + "id": "e9bbb0d7", + "metadata": {}, + "source": [ + "#### Categorical columns" + ] + }, + { + "cell_type": "code", + "execution_count": 253, + "id": "8e3f1b32", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "['OSOURCE',\n", + " 'STATE',\n", + " 'ZIP',\n", + " 'MAILCODE',\n", + " 'PVASTATE',\n", + " 'NOEXCH',\n", + " 'RECINHSE',\n", + " 'RECP3',\n", + " 'RECPGVG',\n", + " 'RECSWEEP',\n", + " 'MDMAUD',\n", + " 'DOMAIN',\n", + " 'CLUSTER',\n", + " 'AGEFLAG',\n", + " 'HOMEOWNR',\n", + " 'CHILD03',\n", + " 'CHILD07',\n", + " 'CHILD12',\n", + " 'CHILD18',\n", + " 'GENDER',\n", + " 'DATASRCE',\n", + " 'SOLP3',\n", + " 'SOLIH',\n", + " 'MAJOR',\n", + " 'GEOCODE',\n", + " 'COLLECT1',\n", + " 'VETERANS',\n", + " 'BIBLE',\n", + " 'CATLG',\n", + " 'HOMEE',\n", + " 'PETS',\n", + " 'CDPLAY',\n", + " 'STEREO',\n", + " 'PCOWNERS',\n", + " 'PHOTO',\n", + " 'CRAFTS',\n", + " 'FISHER',\n", + " 'GARDENIN',\n", + " 'BOATS',\n", + " 'WALKER',\n", + " 'KIDSTUFF',\n", + " 'CARDS',\n", + " 'PLATES',\n", + " 'LIFESRC',\n", + " 'PEPSTRFL',\n", + " 'RFA_2',\n", + " 'RFA_3',\n", + " 'RFA_4',\n", + " 'RFA_5',\n", + " 'RFA_6',\n", + " 'RFA_7',\n", + " 'RFA_8',\n", + " 'RFA_9',\n", + " 'RFA_10',\n", + " 'RFA_11',\n", + " 'RFA_12',\n", + " 'RFA_13',\n", + " 'RFA_14',\n", + " 'RFA_15',\n", + " 'RFA_16',\n", + " 'RFA_17',\n", + " 'RFA_18',\n", + " 'RFA_19',\n", + " 'RFA_20',\n", + " 'RFA_21',\n", + " 'RFA_22',\n", + " 'RFA_23',\n", + " 'RFA_24',\n", + " 'RFA_2R',\n", + " 'RFA_2A',\n", + " 'MDMAUD_R',\n", + " 'MDMAUD_F',\n", + " 'MDMAUD_A',\n", + " 'GEOCODE2']" + ] + }, + "execution_count": 253, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "categorical_columns = data.select_dtypes(exclude=['number']).columns.tolist()\n", + "categorical_columns" + ] + }, + { + "cell_type": "markdown", + "id": "66161dec", + "metadata": {}, + "source": [ + "Create a new empty list called drop_list. We will append this list and then drop all the columns in this list later:\n", + "\n", + "OSOURCE - symbol definitions not provided, too many categories\n", + "\n", + "ZIP CODE - we are including state already" + ] + }, + { + "cell_type": "code", + "execution_count": 254, + "id": "de53d72d", + "metadata": {}, + "outputs": [], + "source": [ + "drop_list = []\n", + "drop_list.append('OSOURCE')\n", + "drop_list.append('ZIP')\n", + "data.drop(columns=drop_list, inplace=True)" + ] + }, + { + "cell_type": "markdown", + "id": "00911ba5", + "metadata": {}, + "source": [ + "#### Reduce the number of categories in the column GENDER. " + ] + }, + { + "cell_type": "markdown", + "id": "3bbd4f17", + "metadata": {}, + "source": [ + "The column should only have either \"M\" for males, \"F\" for females, and \"other\" for all the rest." + ] + }, + { + "cell_type": "code", + "execution_count": 255, + "id": "9e3a5c53", + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "GENDER\n", + "F 51277\n", + "M 39094\n", + " 2957\n", + "U 1715\n", + "J 365\n", + "C 2\n", + "A 2\n", + "Name: count, dtype: int64\n", + "0\n" + ] + } + ], + "source": [ + "print (data['GENDER'].value_counts())\n", + "print (data['GENDER'].isna().sum())" + ] + }, + { + "cell_type": "code", + "execution_count": 256, + "id": "c7fce732", + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "data['GENDER'] = data['GENDER'].fillna('F')\n", + "data['GENDER'] = data['GENDER'].apply(lambda x: x if x in ['M', 'F'] else 'other')" + ] + }, + { + "cell_type": "code", + "execution_count": 257, + "id": "2aea059b", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "GENDER\n", + "F 51277\n", + "M 39094\n", + "other 5041\n", + "Name: count, dtype: int64" + ] + }, + "execution_count": 257, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data['GENDER'].value_counts()" + ] + }, + { + "cell_type": "markdown", + "id": "84790566", + "metadata": {}, + "source": [ + "#### Numerical columns" + ] + }, + { + "cell_type": "code", + "execution_count": 258, + "id": "0656fae2", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['ODATEDW',\n", + " 'TCODE',\n", + " 'DOB',\n", + " 'AGE',\n", + " 'INCOME',\n", + " 'WEALTH1',\n", + " 'HIT',\n", + " 'MBCRAFT',\n", + " 'MBGARDEN',\n", + " 'MBBOOKS',\n", + " 'MBCOLECT',\n", + " 'MAGFAML',\n", + " 'MAGFEM',\n", + " 'MAGMALE',\n", + " 'PUBGARDN',\n", + " 'PUBCULIN',\n", + " 'PUBHLTH',\n", + " 'PUBDOITY',\n", + " 'PUBNEWFN',\n", + " 'PUBPHOTO',\n", + " 'PUBOPP',\n", + " 'MALEMILI',\n", + " 'MALEVET',\n", + " 'VIETVETS',\n", + " 'WWIIVETS',\n", + " 'LOCALGOV',\n", + " 'STATEGOV',\n", + " 'FEDGOV',\n", + " 'WEALTH2',\n", + " 'POP901',\n", + " 'POP902',\n", + " 'POP903',\n", + " 'POP90C1',\n", + " 'POP90C2',\n", + " 'POP90C3',\n", + " 'POP90C4',\n", + " 'POP90C5',\n", + " 'ETH1',\n", + " 'ETH2',\n", + " 'ETH3',\n", + " 'ETH4',\n", + " 'ETH5',\n", + " 'ETH6',\n", + " 'ETH7',\n", + " 'ETH8',\n", + " 'ETH9',\n", + " 'ETH10',\n", + " 'ETH11',\n", + " 'ETH12',\n", + " 'ETH13',\n", + " 'ETH14',\n", + " 'ETH15',\n", + " 'ETH16',\n", + " 'AGE901',\n", + " 'AGE902',\n", + " 'AGE903',\n", + " 'AGE904',\n", + " 'AGE905',\n", + " 'AGE906',\n", + " 'AGE907',\n", + " 'CHIL1',\n", + " 'CHIL2',\n", + " 'CHIL3',\n", + " 'AGEC1',\n", + " 'AGEC2',\n", + " 'AGEC3',\n", + " 'AGEC4',\n", + " 'AGEC5',\n", + " 'AGEC6',\n", + " 'AGEC7',\n", + " 'CHILC1',\n", + " 'CHILC2',\n", + " 'CHILC3',\n", + " 'CHILC4',\n", + " 'CHILC5',\n", + " 'HHAGE1',\n", + " 'HHAGE2',\n", + " 'HHAGE3',\n", + " 'HHN1',\n", + " 'HHN2',\n", + " 'HHN3',\n", + " 'HHN4',\n", + " 'HHN5',\n", + " 'HHN6',\n", + " 'MARR1',\n", + " 'MARR2',\n", + " 'MARR3',\n", + " 'MARR4',\n", + " 'HHP1',\n", + " 'HHP2',\n", + " 'DW1',\n", + " 'DW2',\n", + " 'DW3',\n", + " 'DW4',\n", + " 'DW5',\n", + " 'DW6',\n", + " 'DW7',\n", + " 'DW8',\n", + " 'DW9',\n", + " 'HV1',\n", + " 'HV2',\n", + " 'HV3',\n", + " 'HV4',\n", + " 'HU1',\n", + " 'HU2',\n", + " 'HU3',\n", + " 'HU4',\n", + " 'HU5',\n", + " 'HHD1',\n", + " 'HHD2',\n", + " 'HHD3',\n", + " 'HHD4',\n", + " 'HHD5',\n", + " 'HHD6',\n", + " 'HHD7',\n", + " 'HHD8',\n", + " 'HHD9',\n", + " 'HHD10',\n", + " 'HHD11',\n", + " 'HHD12',\n", + " 'ETHC1',\n", + " 'ETHC2',\n", + " 'ETHC3',\n", + " 'ETHC4',\n", + " 'ETHC5',\n", + " 'ETHC6',\n", + " 'HVP1',\n", + " 'HVP2',\n", + " 'HVP3',\n", + " 'HVP4',\n", + " 'HVP5',\n", + " 'HVP6',\n", + " 'HUR1',\n", + " 'HUR2',\n", + " 'RHP1',\n", + " 'RHP2',\n", + " 'RHP3',\n", + " 'RHP4',\n", + " 'HUPA1',\n", + " 'HUPA2',\n", + " 'HUPA3',\n", + " 'HUPA4',\n", + " 'HUPA5',\n", + " 'HUPA6',\n", + " 'HUPA7',\n", + " 'RP1',\n", + " 'RP2',\n", + " 'RP3',\n", + " 'RP4',\n", + " 'MSA',\n", + " 'ADI',\n", + " 'DMA',\n", + " 'IC1',\n", + " 'IC2',\n", + " 'IC3',\n", + " 'IC4',\n", + " 'IC5',\n", + " 'IC6',\n", + " 'IC7',\n", + " 'IC8',\n", + " 'IC9',\n", + " 'IC10',\n", + " 'IC11',\n", + " 'IC12',\n", + " 'IC13',\n", + " 'IC14',\n", + " 'IC15',\n", + " 'IC16',\n", + " 'IC17',\n", + " 'IC18',\n", + " 'IC19',\n", + " 'IC20',\n", + " 'IC21',\n", + " 'IC22',\n", + " 'IC23',\n", + " 'HHAS1',\n", + " 'HHAS2',\n", + " 'HHAS3',\n", + " 'HHAS4',\n", + " 'MC1',\n", + " 'MC2',\n", + " 'MC3',\n", + " 'TPE1',\n", + " 'TPE2',\n", + " 'TPE3',\n", + " 'TPE4',\n", + " 'TPE5',\n", + " 'TPE6',\n", + " 'TPE7',\n", + " 'TPE8',\n", + " 'TPE9',\n", + " 'PEC1',\n", + " 'PEC2',\n", + " 'TPE10',\n", + " 'TPE11',\n", + " 'TPE12',\n", + " 'TPE13',\n", + " 'LFC1',\n", + " 'LFC2',\n", + " 'LFC3',\n", + " 'LFC4',\n", + " 'LFC5',\n", + " 'LFC6',\n", + " 'LFC7',\n", + " 'LFC8',\n", + " 'LFC9',\n", + " 'LFC10',\n", + " 'OCC1',\n", + " 'OCC2',\n", + " 'OCC3',\n", + " 'OCC4',\n", + " 'OCC5',\n", + " 'OCC6',\n", + " 'OCC7',\n", + " 'OCC8',\n", + " 'OCC9',\n", + " 'OCC10',\n", + " 'OCC11',\n", + " 'OCC12',\n", + " 'OCC13',\n", + " 'EIC1',\n", + " 'EIC2',\n", + " 'EIC3',\n", + " 'EIC4',\n", + " 'EIC5',\n", + " 'EIC6',\n", + " 'EIC7',\n", + " 'EIC8',\n", + " 'EIC9',\n", + " 'EIC10',\n", + " 'EIC11',\n", + " 'EIC12',\n", + " 'EIC13',\n", + " 'EIC14',\n", + " 'EIC15',\n", + " 'EIC16',\n", + " 'OEDC1',\n", + " 'OEDC2',\n", + " 'OEDC3',\n", + " 'OEDC4',\n", + " 'OEDC5',\n", + " 'OEDC6',\n", + " 'OEDC7',\n", + " 'EC1',\n", + " 'EC2',\n", + " 'EC3',\n", + " 'EC4',\n", + " 'EC5',\n", + " 'EC6',\n", + " 'EC7',\n", + " 'EC8',\n", + " 'SEC1',\n", + " 'SEC2',\n", + " 'SEC3',\n", + " 'SEC4',\n", + " 'SEC5',\n", + " 'AFC1',\n", + " 'AFC2',\n", + " 'AFC3',\n", + " 'AFC4',\n", + " 'AFC5',\n", + " 'AFC6',\n", + " 'VC1',\n", + " 'VC2',\n", + " 'VC3',\n", + " 'VC4',\n", + " 'ANC1',\n", + " 'ANC2',\n", + " 'ANC3',\n", + " 'ANC4',\n", + " 'ANC5',\n", + " 'ANC6',\n", + " 'ANC7',\n", + " 'ANC8',\n", + " 'ANC9',\n", + " 'ANC10',\n", + " 'ANC11',\n", + " 'ANC12',\n", + " 'ANC13',\n", + " 'ANC14',\n", + " 'ANC15',\n", + " 'POBC1',\n", + " 'POBC2',\n", + " 'LSC1',\n", + " 'LSC2',\n", + " 'LSC3',\n", + " 'LSC4',\n", + " 'VOC1',\n", + " 'VOC2',\n", + " 'VOC3',\n", + " 'HC1',\n", + " 'HC2',\n", + " 'HC3',\n", + " 'HC4',\n", + " 'HC5',\n", + " 'HC6',\n", + " 'HC7',\n", + " 'HC8',\n", + " 'HC9',\n", + " 'HC10',\n", + " 'HC11',\n", + " 'HC12',\n", + " 'HC13',\n", + " 'HC14',\n", + " 'HC15',\n", + " 'HC16',\n", + " 'HC17',\n", + " 'HC18',\n", + " 'HC19',\n", + " 'HC20',\n", + " 'HC21',\n", + " 'MHUC1',\n", + " 'MHUC2',\n", + " 'AC1',\n", + " 'AC2',\n", + " 'ADATE_2',\n", + " 'ADATE_3',\n", + " 'ADATE_4',\n", + " 'ADATE_5',\n", + " 'ADATE_6',\n", + " 'ADATE_7',\n", + " 'ADATE_8',\n", + " 'ADATE_9',\n", + " 'ADATE_10',\n", + " 'ADATE_11',\n", + " 'ADATE_12',\n", + " 'ADATE_13',\n", + " 'ADATE_14',\n", + " 'ADATE_15',\n", + " 'ADATE_16',\n", + " 'ADATE_17',\n", + " 'ADATE_18',\n", + " 'ADATE_19',\n", + " 'ADATE_20',\n", + " 'ADATE_21',\n", + " 'ADATE_22',\n", + " 'ADATE_23',\n", + " 'ADATE_24',\n", + " 'CARDPROM',\n", + " 'MAXADATE',\n", + " 'NUMPROM',\n", + " 'CARDPM12',\n", + " 'NUMPRM12',\n", + " 'RDATE_8',\n", + " 'RDATE_9',\n", + " 'RDATE_11',\n", + " 'RDATE_12',\n", + " 'RDATE_14',\n", + " 'RDATE_16',\n", + " 'RDATE_18',\n", + " 'RDATE_19',\n", + " 'RDATE_22',\n", + " 'RDATE_24',\n", + " 'RAMNT_8',\n", + " 'RAMNT_9',\n", + " 'RAMNT_11',\n", + " 'RAMNT_12',\n", + " 'RAMNT_14',\n", + " 'RAMNT_16',\n", + " 'RAMNT_18',\n", + " 'RAMNT_19',\n", + " 'RAMNT_22',\n", + " 'RAMNT_24',\n", + " 'RAMNTALL',\n", + " 'NGIFTALL',\n", + " 'CARDGIFT',\n", + " 'MINRAMNT',\n", + " 'MINRDATE',\n", + " 'MAXRAMNT',\n", + " 'MAXRDATE',\n", + " 'LASTGIFT',\n", + " 'LASTDATE',\n", + " 'FISTDATE',\n", + " 'NEXTDATE',\n", + " 'TIMELAG',\n", + " 'AVGGIFT',\n", + " 'CONTROLN',\n", + " 'TARGET_B',\n", + " 'TARGET_D',\n", + " 'HPHONE_D',\n", + " 'RFA_2F',\n", + " 'CLUSTER2']" + ] + }, + "execution_count": 258, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "numerical_columns = data.select_dtypes(include=['number']).columns.tolist()\n", + "numerical_columns" + ] + }, + { + "cell_type": "markdown", + "id": "f1bf093f", + "metadata": {}, + "source": [ + "#### Clean the columns GEOCODE2, WEALTH1, ADI, DMA,and MSA." + ] + }, + { + "cell_type": "markdown", + "id": "3413f398", + "metadata": {}, + "source": [ + "#### GEOCODE2" + ] + }, + { + "cell_type": "code", + "execution_count": 259, + "id": "a1b4afcd", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "GEOCODE2\n", + "A 34484\n", + "B 28505\n", + "D 16580\n", + "C 15524\n", + " 187\n", + "Name: count, dtype: int64\n", + "object\n", + "132\n" + ] + } + ], + "source": [ + "print (data['GEOCODE2'].value_counts())\n", + "print (data['GEOCODE2'].dtype)\n", + "print (data['GEOCODE2'].isna().sum())" + ] + }, + { + "cell_type": "code", + "execution_count": 260, + "id": "38024672", + "metadata": {}, + "outputs": [], + "source": [ + "data = data[data['GEOCODE2'].notna()]\n", + "data['GEOCODE2'].replace(' ', 'A', inplace = True)" + ] + }, + { + "cell_type": "code", + "execution_count": 261, + "id": "0f73334d", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "GEOCODE2\n", + "A 34671\n", + "B 28505\n", + "D 16580\n", + "C 15524\n", + "Name: count, dtype: int64\n" + ] + } + ], + "source": [ + "print (data['GEOCODE2'].value_counts())\n", + "# GROUP BASED IMPUTATION\n", + "# dropped NAs and replaced spaces by category A which is the MODE VALUE" + ] + }, + { + "cell_type": "markdown", + "id": "09fd8a18", + "metadata": {}, + "source": [ + "#### WEALTH1" + ] + }, + { + "cell_type": "code", + "execution_count": 262, + "id": "96027cdc", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WEALTH1\n", + "9.0 7580\n", + "8.0 6785\n", + "7.0 6196\n", + "6.0 5823\n", + "5.0 5277\n", + "4.0 4808\n", + "3.0 4233\n", + "2.0 4083\n", + "1.0 3452\n", + "0.0 2411\n", + "Name: count, dtype: int64\n", + "float64\n", + "44632\n" + ] + } + ], + "source": [ + "print (data['WEALTH1'].value_counts())\n", + "print (data['WEALTH1'].dtype)\n", + "print (data['WEALTH1'].isna().sum())" + ] + }, + { + "cell_type": "code", + "execution_count": 263, + "id": "cf0eb193", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
column_namespercentage_of_nulls
26WEALTH10.46883
\n", + "
" + ], + "text/plain": [ + " column_names percentage_of_nulls\n", + "26 WEALTH1 0.46883" + ] + }, + "execution_count": 263, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "null_df.loc[null_df['column_names'] == 'WEALTH1']\n", + "# 47% of missing values" + ] + }, + { + "cell_type": "code", + "execution_count": 264, + "id": "f1ea590e", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAjcAAAGwCAYAAABVdURTAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8pXeV/AAAACXBIWXMAAA9hAAAPYQGoP6dpAABXpElEQVR4nO3dd3SUZfo+8GtKZiYJ6b0XWiChhISSIAICQbBgRxAQBVcEXZF1V1m/qy77U6wsugqIIoiKYsG2ohCF0GtI6J2QhDAhpPeZZOb9/TFliSlMJlOSl+tzzpxj3rwzuROQufKU+5EIgiCAiIiISCSkzi6AiIiIyJYYboiIiEhUGG6IiIhIVBhuiIiISFQYboiIiEhUGG6IiIhIVBhuiIiISFTkzi7A0fR6PS5fvgwPDw9IJBJnl0NEREQWEAQBVVVVCA0NhVTa9tjMDRduLl++jIiICGeXQURERFbIz89HeHh4m/fccOHGw8MDgOGH4+np6eRqiIiIyBKVlZWIiIgwv4+35YYLN6apKE9PT4YbIiKiLsaSJSVcUExERESiwnBDREREosJwQ0RERKLCcENERESiwnBDREREosJwQ0RERKLCcENERESiwnBDREREosJwQ0RERKLCcENERESiwnBDREREosJwQ0RERKLCcENERESiwnBDREREosJwQ0RERKIid3YBREREZLBuX57F904dGmnHSro2jtwQERGRqDDcEBERkagw3BAREZGoMNwQERGRqDDcEBERkagw3BAREZGoMNwQERGRqDDcEBERkagw3BAREZGoMNwQERGRqDDcEBERkagw3BAREZGoMNwQERGRqDDcEBERkagw3BAREZGoMNwQERGRqDDcEBERkagw3BAREZGoMNwQERGRqDDcEBERkagw3BAREZGoMNwQERGRqDDcEBERkagw3BAREZGoMNwQERGRqDDcEBERkagw3BAREZGoMNwQERGRqDDcEBERkagw3BAREZGoMNwQERGRqDDcEBERkagw3BAREZGoMNwQERGRqDDcEBERkagw3BAREZGoyJ1dwLJly/Dmm29CrVYjPj4eS5cuxYgRI1q8NyMjA6NHj252/eTJk4iLi7N3qUREJGLr9uVZfO/UoZF2rIQ6yqkjN+vXr8f8+fPxwgsvICsrCyNGjMCECROQl9f2X7DTp09DrVabHz179nRQxURERNTZOTXcLFmyBLNmzcLs2bPRp08fLF26FBEREVi+fHmbzwsMDERwcLD5IZPJWr1Xo9GgsrKyyYOIiIjEy2nhRqvVIjMzE2lpaU2up6WlYffu3W0+NzExESEhIRgzZgy2bt3a5r2LFy+Gl5eX+REREdHh2omIiKjzclq4KS4uhk6nQ1BQUJPrQUFBKCwsbPE5ISEhWLlyJb799lts2LABvXv3xpgxY7B9+/ZWv87ChQtRUVFhfuTn59v0+yAiIqLOxekLiiUSSZOPBUFods2kd+/e6N27t/njlJQU5Ofn46233sLNN9/c4nOUSiWUSqXtCiYiIqJOzWkjN/7+/pDJZM1GaYqKipqN5rRl2LBhOHv2rK3LIyIioi7KaeFGoVAgKSkJ6enpTa6np6cjNTXV4tfJyspCSEiIrcsjIiKiLsqp01ILFizA9OnTkZycjJSUFKxcuRJ5eXmYM2cOAMN6mYKCAqxduxYAsHTpUkRHRyM+Ph5arRafffYZvv32W3z77bfO/DaIiIioE3FquJk8eTJKSkqwaNEiqNVqJCQkYOPGjYiKigIAqNXqJj1vtFotnn32WRQUFMDV1RXx8fH4+eefMXHiRGd9C0RERNTJSARBEJxdhCNVVlbCy8sLFRUV8PT0dHY5RETUSXSGDsWdoYbOqj3v3zxbioiIiESF4YaIiIhEheGGiIiIRIXhhoiIiESF4YaIiIhEheGGiIiIRIXhhoiIiESF4YaIiIhEheGGiIiIRIXhhoiIiESF4YaIiIhEheGGiIiIRIXhhoiIiESF4YaIiIhEheGGiIiIRIXhhoiIiERF7uwCiIhInNbty7P43qlDI+1YCd1oOHJDREREosJwQ0RERKLCcENERESiwnBDREREosJwQ0RERKLCcENERESiwnBDREREosJwQ0RERKLCcENERESiwnBDREREosJwQ0RERKLCcENERESiwnBDREREosJwQ0RERKLCcENERESiwnBDREREoiJ3dgFERGSZdfvyLL536tBIO1ZC1Llx5IaIiIhEheGGiIiIRIXhhoiIiESF4YaIiIhEheGGiIiIRIXhhoiIiESF4YaIiIhEheGGiIiIRIXhhoiIiESF4YaIiIhEheGGiIiIRIXhhoiIiESF4YaIiIhEheGGiIiIRIXhhoiIiESF4YaIiIhEheGGiIiIRMXp4WbZsmWIiYmBSqVCUlISduzYYdHzdu3aBblcjoEDB9q3QCIiIupSnBpu1q9fj/nz5+OFF15AVlYWRowYgQkTJiAvL6/N51VUVGDGjBkYM2aMgyolIiKirsKp4WbJkiWYNWsWZs+ejT59+mDp0qWIiIjA8uXL23ze448/jqlTpyIlJcVBlRIREVFX4bRwo9VqkZmZibS0tCbX09LSsHv37laft3r1apw/fx4vvfSSRV9Ho9GgsrKyyYOIiIjEy2nhpri4GDqdDkFBQU2uBwUFobCwsMXnnD17Fs8//zw+//xzyOVyi77O4sWL4eXlZX5ERER0uHYiIiLqvJy+oFgikTT5WBCEZtcAQKfTYerUqfjnP/+JXr16Wfz6CxcuREVFhfmRn5/f4ZqJiIio87Js+MMO/P39IZPJmo3SFBUVNRvNAYCqqiocPHgQWVlZePLJJwEAer0egiBALpdj8+bNuOWWW5o9T6lUQqlU2uebICIiok7HaSM3CoUCSUlJSE9Pb3I9PT0dqampze739PTE0aNHkZ2dbX7MmTMHvXv3RnZ2NoYOHeqo0omIiKgTc9rIDQAsWLAA06dPR3JyMlJSUrBy5Urk5eVhzpw5AAxTSgUFBVi7di2kUikSEhKaPD8wMBAqlarZdSIiS6zb13bbiWtNHRppx0qIyJacGm4mT56MkpISLFq0CGq1GgkJCdi4cSOioqIAAGq1+ro9b4iIiIiu5dRwAwBz587F3LlzW/zcmjVr2nzuyy+/jJdfftn2RREREVGX5fTdUkRERES2xHBDREREosJwQ0RERKLCcENERESiwnBDREREosJwQ0RERKLCcENERESiwnBDREREosJwQ0RERKLCcENERESiwnBDREREosJwQ0RERKLCcENERESiwnBDREREosJwQ0RERKLCcENERESiwnBDREREosJwQ0RERKIid3YBRHRjWrcvz+J7pw6NtGMlRCQ2HLkhIiIiUWG4ISIiIlFhuCEiIiJRYbghIiIiUWG4ISIiIlFhuCEiIiJRYbghIiIiUWG4ISIiIlFhuCEiIiJRYbghIiIiUWG4ISIiIlFhuCEiIiJRYbghIiIiUWG4ISIiIlFhuCEiIiJRsSrc5OTk2LoOIiIiIpuwKtz06NEDo0ePxmeffYb6+npb10RERERkNavCzeHDh5GYmIi//OUvCA4OxuOPP479+/fbujYiIiKidrMq3CQkJGDJkiUoKCjA6tWrUVhYiJtuugnx8fFYsmQJrl69aus6iYiIiCzSoQXFcrkcd999N7766iu8/vrrOH/+PJ599lmEh4djxowZUKvVtqqTiIiIyCIdCjcHDx7E3LlzERISgiVLluDZZ5/F+fPnsWXLFhQUFGDSpEm2qpOIiIjIInJrnrRkyRKsXr0ap0+fxsSJE7F27VpMnDgRUqkhK8XExOCDDz5AXFycTYslIiIiuh6rws3y5cvx6KOP4pFHHkFwcHCL90RGRmLVqlUdKo6IiIiovawKN+np6YiMjDSP1JgIgoD8/HxERkZCoVDg4YcftkmRRERERJayas1N9+7dUVxc3Ox6aWkpYmJiOlwUERERkbWsCjeCILR4vbq6GiqVqkMFEREREXVEu6alFixYAACQSCR48cUX4ebmZv6cTqfDvn37MHDgQJsWSESWW7cvz+J7pw6NtGMlRETO065wk5WVBcAwcnP06FEoFArz5xQKBQYMGIBnn33WthUSERERtUO7ws3WrVsBAI888gjeeecdeHp62qUoIiIiImtZtVtq9erVtq6DiIiIyCYsDjf33HMP1qxZA09PT9xzzz1t3rthw4YOF0ZERERkDYvDjZeXFyQSifm/iYiIiDoji8PNtVNRtpyWWrZsGd58802o1WrEx8dj6dKlGDFiRIv37ty5E8899xxOnTqF2tpaREVF4fHHH8czzzxjs3qIiIioa7NqzU1dXR0EQTBvBc/NzcV3332Hvn37Ii0tzeLXWb9+PebPn49ly5Zh+PDh+OCDDzBhwgScOHECkZHNt6m6u7vjySefRP/+/eHu7o6dO3fi8ccfh7u7O/70pz9Z860QtYlbq4mIuh6rmvhNmjQJa9euBQCUl5djyJAhePvttzFp0iQsX77c4tdZsmQJZs2ahdmzZ6NPnz5YunQpIiIiWn2NxMRETJkyBfHx8YiOjsa0adMwfvx47Nixw5pvg4iIiETIqnBz6NAh89TRN998g+DgYOTm5mLt2rV49913LXoNrVaLzMzMZiM9aWlp2L17t0WvkZWVhd27d2PkyJGt3qPRaFBZWdnkQUREROJlVbipra2Fh4cHAGDz5s245557IJVKMWzYMOTm5lr0GsXFxdDpdAgKCmpyPSgoCIWFhW0+Nzw8HEqlEsnJyZg3bx5mz57d6r2LFy+Gl5eX+REREWFRfURERNQ1WRVuevToge+//x75+fnYtGmTefSlqKio3Y39TDuwTARBaHbtj3bs2IGDBw9ixYoVWLp0Kb744otW7124cCEqKirMj/z8/HbVR0RERF2LVQuKX3zxRUydOhXPPPMMxowZg5SUFACGUZzExESLXsPf3x8ymazZKE1RUVGz0Zw/Mp083q9fP1y5cgUvv/wypkyZ0uK9SqUSSqXSopqIiIio67Nq5Oa+++5DXl4eDh48iF9//dV8fcyYMfj3v/9t0WsoFAokJSUhPT29yfX09HSkpqZaXIsgCNBoNBbfT0REROJm1cgNAAQHByM4OLjJtSFDhrTrNRYsWIDp06cjOTkZKSkpWLlyJfLy8jBnzhwAhimlgoIC886s999/H5GRkYiLiwNg6Hvz1ltv4amnnrL22yAiIiKRsSrc1NTU4LXXXsPvv/+OoqIi6PX6Jp+/cOGCRa8zefJklJSUYNGiRVCr1UhISMDGjRsRFRUFAFCr1cjL+1+fEb1ej4ULFyInJwdyuRzdu3fHa6+9hscff9yab4OIiIhEyKpwM3v2bGzbtg3Tp09HSEjIdRcAt2Xu3LmYO3dui59bs2ZNk4+feuopjtIQERFRm6wKN7/88gt+/vlnDB8+3Nb1EBEREXWIVQuKfXx84Ovra+taiIiIiDrMqnDzr3/9Cy+++CJqa2ttXQ8RERFRh1g1LfX222/j/PnzCAoKQnR0NFxcXJp8/tChQzYpjoiIiKi9rAo3d911l43LICIiIrINq8LNSy+9ZOs6iIiIiGzCqjU3AFBeXo6PPvoICxcuRGlpKQDDdFRBQYHNiiMiIiJqL6tGbo4cOYKxY8fCy8sLFy9exGOPPQZfX1989913yM3NNXcUJiIiInI0q0ZuFixYgJkzZ+Ls2bNQqVTm6xMmTMD27dttVhwRERFRe1kVbg4cONDikQdhYWHNTvkmIiIiciSrwo1KpUJlZWWz66dPn0ZAQECHiyIiIiKyllXhZtKkSVi0aBEaGhoAABKJBHl5eXj++edx77332rRAIiIiovawKty89dZbuHr1KgIDA1FXV4eRI0eiR48e8PDwwCuvvGLrGomIiIgsZtVuKU9PT+zcuRNbt25FZmYm9Ho9Bg0ahLFjx9q6PiIiIqJ2aXe40ev1WLNmDTZs2ICLFy9CIpEgJiYGwcHBEAQBEonEHnUSERERWaRd01KCIODOO+/E7NmzUVBQgH79+iE+Ph65ubmYOXMm7r77bnvVSURERGSRdo3crFmzBtu3b8fvv/+O0aNHN/ncli1bcNddd2Ht2rWYMWOGTYskIiIislS7Rm6++OIL/P3vf28WbADglltuwfPPP4/PP//cZsURERERtVe7ws2RI0dw6623tvr5CRMm4PDhwx0uioiIiMha7ZqWKi0tRVBQUKufDwoKQllZWYeLIvFaty/P4nunDo20YyVERCRW7Rq50el0kMtbz0MymQyNjY0dLoqIiIjIWu0auREEATNnzoRSqWzx8xqNxiZFEREREVmrXeHm4Ycfvu493ClFREREztSucLN69Wp71UFERERkE1adLUVERETUWTHcEBERkagw3BAREZGoMNwQERGRqDDcEBERkagw3BAREZGoMNwQERGRqDDcEBERkagw3BAREZGoMNwQERGRqDDcEBERkagw3BAREZGoMNwQERGRqDDcEBERkagw3BAREZGoMNwQERGRqDDcEBERkagw3BAREZGoMNwQERGRqDDcEBERkagw3BAREZGoMNwQERGRqDDcEBERkagw3BAREZGoMNwQERGRqDDcEBERkagw3BAREZGoMNwQERGRqDg93CxbtgwxMTFQqVRISkrCjh07Wr13w4YNGDduHAICAuDp6YmUlBRs2rTJgdUSERFRZ+fUcLN+/XrMnz8fL7zwArKysjBixAhMmDABeXl5Ld6/fft2jBs3Dhs3bkRmZiZGjx6NO+64A1lZWQ6unIiIiDoruTO/+JIlSzBr1izMnj0bALB06VJs2rQJy5cvx+LFi5vdv3Tp0iYfv/rqq/jhhx/w008/ITExscWvodFooNFozB9XVlba7hsgIiKiTsdpIzdarRaZmZlIS0trcj0tLQ27d++26DX0ej2qqqrg6+vb6j2LFy+Gl5eX+REREdGhuomIiKhzc1q4KS4uhk6nQ1BQUJPrQUFBKCwstOg13n77bdTU1OCBBx5o9Z6FCxeioqLC/MjPz+9Q3URERNS5OXVaCgAkEkmTjwVBaHatJV988QVefvll/PDDDwgMDGz1PqVSCaVS2eE6iYiIqGtwWrjx9/eHTCZrNkpTVFTUbDTnj9avX49Zs2bh66+/xtixY+1ZJhEREXUxTpuWUigUSEpKQnp6epPr6enpSE1NbfV5X3zxBWbOnIl169bhtttus3eZREREnY5eEFBV3+DsMjotp05LLViwANOnT0dycjJSUlKwcuVK5OXlYc6cOQAM62UKCgqwdu1aAIZgM2PGDLzzzjsYNmyYedTH1dUVXl5eTvs+iIiIHOHE5Qrsv1iK3JJavPzjcbw7JRET+4U4u6xOx6nhZvLkySgpKcGiRYugVquRkJCAjRs3IioqCgCgVqub9Lz54IMP0NjYiHnz5mHevHnm6w8//DDWrFnj6PKJiIgcpqC8Dp/vy4NwzbVnvz6M7gHd0DvYw2l1dUZOX1A8d+5czJ07t8XP/TGwZGRk2L8gIiKiTkYvCPgxuwACgN5BHhjbNwiH88ux81wxHv/0IH548iZ4ubo4u8xOw+nHLxAREVHbMnPLkF9WB4VcirsSwxDm7Yp3pyQizNsVF0tq8fqvp5xdYqfCcENERNSJ1Wl1+PWYYY3p2D5B5hEaX3cF3rivPwDgx+zLqNPqnFZjZ8NwQ0RE1IkdvlSOugYdAropkRLr1+RzKbF+iPR1Q7WmEZuOW9YA90bAcENERNSJHb5UDgBIjvaBTNq0ya1UKsE9g8IAAN9kXnJ0aZ0Www0REVEnVVarRW5JLSQA+od7t3jPvYPCAQC7zhfjcnmd44rrxJy+W4ocZ92+vOvfZDR1aKQdKyEiIkscyS8HAMT4u7e6GyrC1w3DYn2x90IpvssqwLzRPRxYYefEkRsiIqJO6vClCgDAgAjvNu+7LykCALDhEKemAIYbIiKiTqmwoh6FlfWQSSVICG27C//4+CDIpBKcv1qDAk5NMdwQEREBhkZ5l8vrkFdSg8vldRAE4fpPsqOjBYZRm15BHnBVyNq810Plgn5hhgC053yJ3Wvr7LjmhoiIbnjaRj0+3pmDC8U15msDwr3wQHIEJBJJG8+0n3NFVQCAviGWHa2Q2t0P2fnl2HO+BPclhduztE6PIzdERHTD++dPx3GhuAZyqQS+7gpIJYb1LltOFzmlnjqtDpfKDNNLPQItDTf+AIA954udPurkbAw3RER0Q1u3Lw+f78uDBIados+m9cakgYbeMb+fLDJPDznS+avVEAAEeCgtPjMqKcoHCpkUlyvqkVtSa98COzmGGyIiumEVV2vwz5+OAwDG9Q1CXLAnAGBwtC+Gdzd0A/4huwANOr1D6zpbVA0A6BnYzeLnuCpkSIz0BgDsvsHX3TDcEBHRDeuLfXnQNOrRP9wLI3sFNPncrQkh8HZzQa1Wh2xjvxlHEATBvN6mRzvCDQCkGAPZ7vPFNq+rK2G4ISKiG1KDTo9P9+YCAB4dHtNs4bBMKkGq8SynXecct46ltEaLstoGyCQSxPi7t+u5pnU3ey+U3NDrbhhuiIjohvTLsUIUVWkQ4KHExH4hLd6THO0LpVyKoiqNearI3kxfJ9LPDUp521vA/2hghDdcXWQortbinIPq7YwYboiI6Ia0ZlcOAOChoZFQyFt+O1S5yJAc5QPAMHrjCOesWG9jopBL0S/c0O/GkVNpnQ3DDRER2Z0gCCiu0kCn7xxTJccKKnAorxwuMgkeGhrV5r0p3f0hgWFEpaRaY9e69IKAHGOvne4B7Q83gGH0BvjfaeI3IjbxIyIiu9I06PBV5iWcVFfC1UWGuGAPjOgZgGAvldNq+vmoGgCQ1jcYAR7KNu/1dVege2A3nCuqxrGCCozsHWi3uq5U1qOuQQeFTIpQb1erXqO/ceTmyCXHb2HvLDhyQ0REdlNao8XybedxUl0JAKhr0CErvxwf7byAyroGp9QkCAI2HSsEANyaEGzRc0xnOx29bN/AYBq1ifJzg0xqXWfkAeHeAICT6kpoGnW2Kq1LYbghIiK70OkFrN1zEUVVGnio5Hj85lg8NiIWwZ4q1Gp1+CbzEvRO2NFzrqgaF4proJBJMap3wPWfAKBvqCckAC6X16O0Rmu32kzhpr27pK4V7uMKX3cFGnQCTqqrbFVal8JwQ0REdnEorwxFVRq4usgwd1QPRPm5I8bfHQ8OjoCLTIJzV6sdtkj3Wr8aR22G9/CDh8qy7r/dlHLEBBgCxzE7dSwWBAEXbRBuJBLJNVNT5bYorcthuCEiIpur0+rw+8krAIDRcYFNjhAI9FThtn6hAIDNJ66gwsHTU5tOtG9KysR06vYxO01NnSuqRo1WBxeZBGE+1q23MTFNTd2oO6YYboiIuji9IOD81WpUaxqdXYrZ6t05qKxvhLebC4bF+Db7/OBoH0T5ukGnF7Avx3FHBeSX1uJYQSWkEmBsn6B2PbdviGFq6lJZHfJLbX92096cUgBAhK8b5NKOvT0PiLixFxUz3BARdVE6vYADOaVYkn4Gq3bm4O3Np7Hz7FWnb7eu0TRiecZ5AMC4PkGQy5q/1UgkEqT2MHTT3Z9TivoGxyx83XzCMJqUHO0Lv25t75L6Iw+VC6KN00WbjhfavLZ9FwwhryNTUib9jSM3569Wo6reOQu3nYnhhoioi/ohuwDfZRegtEYLqQTQNOqx8VghVm4/79RdMhuPqlFV3wg/dwUGGHuutKRviCe8XQ1nN/2YfdkhtWWcLgIApPVt36iNSZ8Qw8GaW42vYyuCIGCfceTGFuHGv5sSYd6uEAQ45VRzZ2O4ISLqgk4VVuJgbhkkACYkBOMft/XFPYlhULlIkV9WhxUZF5xW2zeZlwAASVE+kEpa384sk0owzHh208e7cux+FlJ9gw77jQHi5l6W7ZL6o7hgDwCG0SZbTgPmFNfgapUGcqkEET5uNnnNG3lqiuGGiKiLqdPq8F1WAQBgeA9/jOgZAKWLDMnRvrhrYBgA4P2t55xytlB+aS325ZRCIvlfp9y2DI72hYtMglOFVeaRC3vZn1MKTaMewZ4qq442AAwjIn7GbdY7z161WW2m7z3cxw0uLUzjWSPe2JvnxOVKm7xeV8JwQ0TUxWw8Zpj28e+mwLg/TK/0C/NC7yAPaHV6/H3DUegdvP7m20OGUZubevjD201x3ftdFTLzzp4f7Dw1tf2MIYyM6Onf7ATw9jCN3vx+0nZTU7Zcb2MSH2qYQjtu58aDnRHDDRFRF1Jeq0VWXhkA4N5B4c1+y5dIJLhzYCjcFDLsv1iKzSdsv/C1NXq9YA439w4Kt/h5psWvvxxTo0Gnt0dpAIAdZw09daydkjLpHWxad3PVJuHR1uttTPoaw82F4hrUajvPTjpHYLghIupC9uWUQi8Asf7uiPJr+Y3Qx02BR4ZHAwA+2Z3rsNoO5pYhv7QO3ZRyjI+3vIdMjL87/LspUF7bYLemflcq63H6ShUkEsOoUkdE+7vBXSFDcbXGJj1v8kvroK6oh4tMgkhf26y3AYBADxUCPJQQBOBU4Y3VqZjhhoioi2jQ6XHgouE3/NTufm3e+9DQKEglwJ4LJThzxTFvbKbOv+Pjg+GqkFn8PJlUgon9QgAAPx1W26U205RU/zAv+Lhff7qsLXKpFCN6GkZ/tpzq+NTUXmOfn/7h3lDIbfu23Ne4u+tGW3fDcENE1Ap1RR0OXCzFL0fV+CG7wO67ea7nyKVy1Gp18HZzQZzxTas1od6uSOtrGD1Zu+ei3WsTBAHpJw3h5o/rgCxxxwBjx+LjhXbpeWOrKSmTW+IMJ4PbItzsu2AIrENbaHbYUX3N624YboiIbnjbThfhP1vO4busAuw4V4ynv8zG/PXZqNM6p3+MIAjYfd7wG/6wGL82t1ibzEiNAgBsOFSASjs3cjtzpRr5pXVQyKW4uVf7p32SIn0Q7KlClaYR287YbhcSYFgLtNM43WUacemoUXGG1zlyqQJFVfUdei1Th+ahsW2PxlnDtKj4hJrhhojohnbwYik2GTvZxga4Y1CkD+RSCX7Ivox7lu92+FlIgOE3b3VFPeRSCZKjfSx6TkqsH3oGdkOtVocNxt4z9vKb8Rypm3r4w00hb/fzpVIJbu9vmJr6+Yhtp6aOX65EaY0W3ZRyJEZ62+Q1Az1U5rOmMk5bH8YKyutwqawOMqkESVGW/bm2h2la6pS6Eo12XKzd2TDcEBFd48yVKnMPmZG9AjD7pljclxSOz2cPhX83BU6qK/H25tMOr2vjUcMbflywh8XhQSKR4KGhkQCA7+y8zdp0rIE1U1ImE4zrbracKrJph+Xtxn40Kd39bNZDBjAcCAoAWzqwJXy3cUQpIcwL3ZTtD4XXE+3nDjeFDJpGPXKMJ47fCBhuiIiMdHoBPx6+DAHAoEifJi36h8b64d0HEwEAn+3NxVEHdn0VBMEcbhKMowWWuq1/KKQS4HB+OXJL7PPmVlRZj8PG06fHGN/wrZEY4Y0gTyWqNY023TVlWkxsq/U2Jqbvdee5YmgbrRsVMU3BjezZsR1crZFKJeYjI26kdTcMN0RERgdzS83TF3cOCG3W6C21hz8mDQyFXgD+7/ujDjug8lRhFS6W1EIulaB3kEe7nhvgocRw49bn/9p4usfkN+PIxcAIbwR6qqx+HalUYt5Cbtp51VHVmkZk5hr6At1s4wDRL8wL/t0MYcy0i609dHrBvNB5ZG/bBq9r3YjrbhhuiIhg2Ga91bjzZVTvgFa35L4wsQ88lHIcvlRhblhnb78YR216BXlA6WL5FmuTO/obdiLZ63BK03qbjkxJmdyaYAg36Seu2GSNyN7zJWjUC4jyc2u1L5C1pFIJRvW2fkt4dn45Kuoa4KmSm7s020PfkBuvUzHDDRE5hbZRj6r6BuidvL3aZN+FElTWN8Lb1QVDolvfkhvoqcKTt/QAAHy4/YJDtodvNI5iJIS1vf27NeMTguEik+D0lSqctnEztxpNo3kn0tg+HQ83Q6J94ePmgrLaBvMhlx1hWm8zwk7TPqYt4b+fvNLuvwvbzMdBBEBuw7VAf3TtGVPObmfgKAw3RORw32VdwqsbT2LxL6fw4g/HsCzjHEqqNU6rp1Gvx3bj9MAtcYHXfaOZOjQS3ZRynC2qNr+x28vZK1U4V1QNhUyKuGDrwo2XqwtG9jK8Cf902LajNzvOGtabRPq6oVeQdYdRXksuk5pHgH493vGpKXN/GxttAf+jm3sZRvkultS2e9rHvN7GxmuB/qhnUDfIpBKU1TZAXdGxbetdBcMNETlMg06Pf3x/DM+sPwytccpBLwCXyurw4Y4LKKp0zj+8p9RVqNY0wkMpR2Lk9bfjeqhccH+y4eykVTtz7Frbz8YpqRE9/aGyYkrK5M6Bhqmpn45ctulv7+nGXVJj+wR16DDKa01IMOya+vVYYYfObsotqUFOcQ3kUglSrtPR2VrdlHLc0tsQHNuzpqm0Rosjl8oB2He9DQCoXGTmU9BvlE7FDDdE5DBL0s/g072Gs45uiQvEoknxWDC2FwI9lKisb8TKHRdwtcrxIzimxaBJUT6QSS17g56ZGg2JxNDj5FxRtd1q++WoYfTCtE3aWmP7BMLVRYbcklocsdFOL51ewJZTtltvY5Laww8eSjmKqjTIMu7CsoZpofOQGF94qFxsVF1ztw8wHR1heXDccfYqBMGwtT+oA4uwLdX3BtsxxXBDRA5x/HIFVm6/AAB458GBGNsnCHKpFP4eSvxpRCzCvF1Rq9Xhewcfc1BaozWHk+Q21tr8UZSfu3mNyZrd9hm9OVdUjdNXqiCXSjCug+tZ3BRyjDUGEFtNTWXmlqGstgFeri4YbGFjQUso5TLc0scwGvLrMet3eP1mHFUaY4O1QG25Jc4QHC+V1eGwhcHRtBvM3qM2Jn3NO6ZujEXFDDdEZHc6vYCFGwxbpyf2C8akgWFNPu+mlGPq0Ei4yCTIKa7B0QLH/QN8MLcUAoAegd3g284DFR9JjQYA/JB12S7HMpje2If38IeXW8dHHu4wdgD+7xF1h6Z7TEy7pCxZp9Ret5q2hB8vtCrsVtQ2YL9xRG5sH+t771ji2uD4XwuCY1mNFr8bR5UmDQi7zt220fcG2w7OcENEdvfJ7os4cqkCnio5Xr4zvsV7fNwU5oWVG4+qbdqhtjU6vWDugTK4HaM2JsNi/RDu44oqTSM2n7BNX5ZrbTROSU3sF2yT1xvZOwAeKjkKK+ut6styLUEQmqy3sbWRvQOgcpEiv7TOqqmUjDNF0OkF9AzsZvMt4C25vR3B8acjl6HV6dE3xNMcOuwtPsSwYyq/tM4px4c4GsMNEdlVfYMOyzLOAQCen9AHgR6try8Y0TMAvu4KVNY3dui8HkudK6pGVX0j3BUy9AlpX3M8wNDn5L4kw8Lirw/atufNxeIanFBXQiaVYFxf24QbpVxmHhH5sYNTU2euVCOnuAYKudQuUytuCrk57G6yYteUaWRkrA3XArVlZK8AeCgNwXHb2bb/7n5jPOfL9HfHEbzcXBDm7QoAOHkDjN4w3BCRXW04VIDiai3CvF3NO4xa4yKTYqKxidveCyWob7Dv6M1h426V/uHekEut++fw3kGG72nX+WIUlNfZqjT8YlyTkRLr1+7psrbcMSDU/PoNHWiS94txyuzmnv52ORMJ+F9Dv1/a2a24QafH1tPGcGPnKSkTlYsMDw6JAAAs33q+1ftOF1bhyKUKyKUSTDLuYHMUU6fiG2FRMcMNkQhV1TeguErj9IZdOr2Aj3YYFhE/elOMRYcWxoV4ItBDCU2jvsNTJ22p1Taat8UOjPC2+nUifN0wLNYXggCbnrxtCg8TO7hL6o9Su/vBz12B0hpth85vMi2INR2XYA+3xAXBRSbBuaLqdo02HMgpRVV9I3zdFRgYYfuTtlsze0QsFDIp9l8sbbUBoamr9ei4QPh1UzqsNuCadTcMN0TUlVyprMfXB/Px+q+nsOS3M3hr82lsPKq2+whIa9JPXMGF4hp4quR4cHCERc+RSiS4yXgW0q5zxWjUd7wFf2u1aXV6+LorEO7j2qHXuj/J8L19c+iSTQJlfqlhu7ZUAqTF23ZaRS6TmgOTtVNTF4trcKqwCjKpxC7rbUy8XF0wJs7w+usP5Fv8vG8PGU51T+sbZPHWflsI8lThPuPo5PtbzzX7/OXyOqzblwfAsVNSJqZOxTfCMQwMN0Qise3MVfxny1lk5ZdDLwAyiaEj6c5zxVi9KwcaJwScldsNw/PTU6Lg3o6pi4ER3uimlKOyvtFup2+bzlkaEO7d4eZzE/oFw11h6CFz0LhAuSNMoyJDY/zgb4ff7k0N/TYfv2JV8DV1Dk6J9YOPDafMWmKa6tlw6JJFtVbVN5hPUL8/2bJAbUtzbu4OqcTw/2PmNX8XBEHAC98dRbWmEYMive0aCltjGrk5V1TtkAX7zuT0cLNs2TLExMRApVIhKSkJO3bsaPVetVqNqVOnonfv3pBKpZg/f77jCqUbXrWmEYdyy/DtoUv4dG8uTqo7zzktZ65U4cnPD0EvGLY0zx3VHf+4vS+mDImEq4sM+WV1+GRPLrSN9hkFaclJdSUO5ZXDRSbBw8Yt05aSy6RINXaU3Xmu2OY/59Iarbn1/YAIrw6/nptCbh4N+fqg5SMMrdlonpKyz5RPUqQPQrxUqNY0IuN0+w98NE9JJdhvSspkRM8AhHm7orK+0TxV15b/HlGjrkGH7gHuGBTpbff6/ijSz83c6mD2JwfMU0DfZxdg6+mrUMikeOO+/g4dUTIJ9VLB280FjXoBZ6/Yr/FkZ+DUcLN+/XrMnz8fL7zwArKysjBixAhMmDABeXl5Ld6v0WgQEBCAF154AQMGDHBwtXQju1RWi3+nn8E3hy4hM7cMJ9WV+HRvLpZlnEehk89qKanW4NE1B1ClaUS0nxtmDItCuI8bFHIp+oV54ZHh0VDKpbhYUoPvswscVpdpGmFsn6A2d0i1ZkiML1xkEqgr6pv8BmwLG4+q0agXEOqtsqq2lphGCX4+okatttHq17lcXoesvHJIJPZbzyKVSswLi3863L4meZfKapGdb6zPATuRZFIJHjD+bL/Yf/3g+JUxXE4eHGGz4yDa6+U74zEg3AtltQ146KO9mPzBHvztmyMAgD+P6YEege3fmWcLEonE3KlY7OtunBpulixZglmzZmH27Nno06cPli5dioiICCxfvrzF+6Ojo/HOO+9gxowZ8PKy7LctjUaDysrKJg+i9sgtqcGqnTmoa9DBv5sSN/cMwE09/OEik6CgvA5rduc4tW/Ev/57ApfK6hDt54ZpQ6OaNVML93HDwynRkADIzi+361EBJppGnTlIPWDhWps/clPIMSDcGwDMRzbYyg/G2kyvbwuDo30Q6euGGq3OPLJhDdPOoMFRvgi0Y1v+O/obws1vJ6+gWmN5GPvKGFpTYv3sWt+1HhgcDqkE2J9T2ubf37NXqpCVVw6ZVIK7Ex2/psXEy9UFa2cNxYAIb5TVNmBfTikadAJG9PTH4yO7O60u4NodU+Jed+O0cKPVapGZmYm0tLQm19PS0rB7926bfZ3FixfDy8vL/IiIcPwcLHVdVyrrsXrXRWga9Yj2c8e8Ud1xa0IwJvYLwbNpvc1nIn2692KHflu3VnZ+Ob43rh15d0oi3FpZ1xLt745hsYZpnh+yCzq0BdgS6SeuoLy2AcGeqg6dxjzUWPPGo2oU2+jU8EtltThwsQwSiWELuK1IJLbpefOLcb3IBDtNSZkkhHkixt8dmkY9Nlp44GOjTo+vjN/bg0Mi7VleEyFerrglzrCl+9+/nWn1vk/2XARg6Jgc4OHYnUh/5OXqgk9nDcGfx/TE/7srAVufHYW1jw6xaMegPd0onYqd9lMuLi6GTqdDUFDTYc2goCAUFtqu0+fChQtRUVFhfuTnd3w+nG4Mer2A77MKoNXpEePvjpmp0VBecyqzh8oFM1Ki4aaQ4XJ5PZ779qhD6xMEAYt+Og7A0Gvlem/U4/oGwUMlR0mNFtvP2LdBnmlK6r6k8A6tLQjzdkWEjysadEK7dsu0xbRDaFiMH7xcbXuY4j2DwiCRAHsulCC/tLbdz88tqcHBXEPwutXO61kkEgkmG0fVPt6VY9G6pm1nrqKwsh4+bi4Yb+NdXNezYFxvSCWGab8950uaff5YQYV5J9Ijw6MdWltrPFUuWDCuF6YNi0KMv7vTpsmuZdoxdeJypU2O4OisnL6g+I9/2IIg2PQvgFKphKenZ5MHkSW+PJCP3NJaKGRS3J8UDoW8+f8uvu4KTB8WBanEcBjhzrPW9w1pr5+OqHEorxyuLjL87dbe171f5SLDbcZFr9vPXrXb6duXymqx09g/5QEb7FYxjd6s25cHnQ3+MTbtkror0fYN1MJ93DC8u2Eb+7r9La8dbIupc+1NPfwR4tWx7emWmDI4Em4KGU4VVpn/zNpiWvNyz6BwKOWy69xtW31DPfHQ0CgAwD9/Oo7Ga0YfdXrDTiS9YGhSmGr8M6DmYv3doZBLUaPVIc+KAN5VOC3c+Pv7QyaTNRulKSoqajaaQ+RoRVX1WPzLSQCGEQ9vt9a3u0b5uZvfgP/4j6696PQC/p1uGJ5/YlR3BFm49qFfmJd5JMS0TdvWvsm8BEEwrMmI9HPr8Ov1C/OCt5sLCsrrrNrZc61ThZU4VVgFhUyKWxNs2xzPZNowwxvwl/vz2rXNWqcXzOHGFqHQEl5uLuav9dGOtk82v1JZb+76O2WIc6b3F4zrBW83F5wqrMKbm06jUaeHIAj4eGcODl+qgIdSjn/c1scptXUVcpkUccGGBc1i7lTstHCjUCiQlJSE9PT0JtfT09ORmprqpKqIDP6dfhZV9Y0I83ZFinFLclvGxgXBx80FZ4uq8ZmNF7+25NdjhcgproG3mwtm3RRj8fMkEgluMTZF+3Rvrs3WsZjo9YJ5vclkKxcS/5GLTGp+A+7owuLvswyjNqPjAmw+JWUytk8gQr1UKKttwH8tXMsCGBoWqivq4eXqgnEOOg8JAB4dHgOJsS/LmStVrd63POM8dHoByVE+Ttvt4+OuwN/GxwEAPth+AXct24V7lu/GKxsNv4g8O763wxY5d2Xx5nU34l1U7NRpqQULFuCjjz7Cxx9/jJMnT+KZZ55BXl4e5syZA8CwXmbGjBlNnpOdnY3s7GxUV1fj6tWryM7OxokTJ5xRPolUQXkdvsk0DL/f1i8EUgumSV0VMvwlzTA1tCT9DCpq7bd7ShAErNhmGHWZkRLdruZ4ANArqBvCfVxR36DHyu0XbFrb7vMlKCivg4dKbtM1Iw8NNSxe3XbmKnJLaqx6jQadHhuMre/vMvYhsQe5TIqHjKM3nxoXuFrCtIV50sBQqFwcN+UT6eeG8caDOd/cdLrFtTenCivNwfLpsT0dVltLpgyJwFv3D4CnSo5jBZXIyiuHUi7FnJHdzaNm1DbTdnCO3NjJ5MmTsXTpUixatAgDBw7E9u3bsXHjRkRFGf6CqtXqZj1vEhMTkZiYiMzMTKxbtw6JiYmYOHGiM8onkVqRcR4NOgGp3f0Q7e9u8fOmDIlE7yAPVNY3YvXutof4O2LXuRIcLaiAykWKme1sjgeYRm8MO08+3ZOLEhuO3qy30xt0lJ87RvYKgCDAvGi0vbacKkJRlQb+3RQYY+fusA8OjoBCJsXhSxXIzi+/7v2lNVpsPn4FgOOmpK719NiecJFJkH7iijlkmQiCgJd+OA6dXsCEhGCM6MDuN1sw7Ur7bcFITBkSgcdHxmLHc6Px/IQ4pzTG64r6XrOoWKycvqB47ty5uHjxIjQaDTIzM3HzzTebP7dmzRpkZGQ0uV8QhGaPixcvOrZosrnThZXYcOgS1u3Pw+f7cnHisnO6/xZW1Jt35fx5TPt+Q5VJJXhqTA8AwMc7c1BVb5/Rm+XbDGfWPDg40urTonsHeaBfmBfqGnT4ZPdFm9RVXqvFJmNb/snJtt8mPN34W/lXB/OtOjLgi/2mM30iWlwcbkt+3ZS4fYBhTc97W85e9/4Ptp2HVqdHQpinecrAkfqEeJpHHv/50wnkFP9vdOyzfXnYl1MKlYsUL3Si9SyBniosvqc/Fk7oY7NGjDeKuGAPSCRAUZXGbhsLnM3p4YZubHVaHb4+mI9P9uTiYG4ZjhVU4PjlSny2LxfTVu1zSMO5a60wvskMjfE194VpjwkJIege4I7K+kas3WP7tTfHCiqw61wJZFIJZo+wfK3NH0kkEjwxytBM7JM9uahpRxO31nyfVQBtox59QjyREGb7N+jRcYEI83Zt91oWwLCDy3TcgqUHeHbUvNE9IJdK8NvJojZP3y6sqMcaY8BcMK6X07YLPzYiFsNifVGr1eH+FXvw9ubTmLfuEP7x/TEAwNxRPRDu0/EF4uR87ko5Yoyj0mLtd8NwQ05TrWnE+xnnkJVfDgmAoTG+uKN/CEb08IdcKsGucyW4f8VunL/qmIBTVqPFlwcMv90/dYt16wpkUon5uR/tuGCT0HCtj3caprsm9gvp8BvN+PhgxPi7o6KuwTyqYS1BEPCZcbroQTu1vZdJJXhomGFEaMW28+3aFv7VgXwIAjC8R/umGjuie0A38xqQ//fzyVbr/c+Ws9A06pEc5YPRvQMdUltLZFIJljwwEOE+riiu1uA/W87h5yNq49/pHpg3uofTaiPb+9+6G3EuKma4IafQ6QV8sT8PpTVaeLu54E83x2LSwDCkdPfHhH4hmD+2F/qFGc5mmbFqP4oq7X9+07r9eahv0CM+1BPDe7R/1Mbk9v4hiPZzQ1ltAz7fZ7vRm6LKevx0xLDbpz07pFojk0rwp5tjAQCrduZ06FDNfca2+G4KGe4eZL/FutOHRcFTJce5omqLDlEEgPoGHb40TjVOcWBXXQB4ekxPeKrkOKmubPFAzQtXq83ToH8d39vpTd5CvV2x5S+j8P7UQRjR0x9JUT74fu5w/CWtN9eziEy8yNfdMNyQU2w8qkZOcQ2UcilmpkQjyq/pb9O+7gqsfmQwov3cUFBeh5mrD1i1zsJSmkadeWrgsRGxHXqTkcukmGv8LXfl9guo09qm7rV7ctGgE5AU5YOBEd42ec27E8MQ4KGEuqK+Q4dqmnbSTBoYBk+VfbZYA4au0I8ag917W85Z1GH18315KKrSIMRLhbS+9j/F+lo+7grz2q2XfzpunhoDYP573agXMLJXgLlXkrMp5FLc1j8En84aim+fSEW/8I6fmk6dj/kYBoYbIts4qa7EnguG9un3J0W02pfCv5sSax8dCj93BU6oK/HO79dfmGmtH7Mv42qVBsGeKtzWv+PN3e5ODDMO72s7POUDGEYfTKNAthi1MVG5yDDb+Hrvbz1nVQPCoqp6bDIe9jhtmP1HRh5JjYGHUo5ThVXYfOJKm/fWaBqxbKthAfafx/S0+0LiljycGo1b4gJR36DH7E8O4KMdF/Bd1iVMWbkXeaW1iPR1w2v39nN4XXRjM01L5ZTU2Hz6vDNguCGHatDp8V/j1MqInv7m3x5aE+nnhsX3GP7h/2DbeRy2YFttewmCgFXGtSwzh0fb5GA7F5kUc0cZRm8+2H6+w6NOX2deQlltA8K8XZFm4wZv01Oi4OuuQG5JrfkQzvb46kA+GvUCBkV6m4e67cnLzQUPG7fAv/HrqTZHxtbsvoiSGi2i/NzMh1o6motMihXTknBbvxA06AT8v59P4pn1h83B5ss/DXPIUQtE1wrwUCLQQwlBMPQxEhuGG3KobWeuoqy2AV6uLhgTZ9mbdFp8MCYNDIVeAJ79+rDNp6d2nivGqcIquClkmDLYdiMP9yaFIcRLhSuVmhbXW1iqUac3H5Xw2IgYyG18qrCbQm5ee/PelrPtGr2p1Taap/Mc2UDtsRGxCPRQ4kJxjfmYjD+6WqUxNztcMK6XU09jVsileHdKIuaP7Ylhsb4Y3sMP9ySG4cs/DUOoN4MNOUe8iKemGG7IYfJKas2nUU/sF9KuKYKX74iHfzcFzhZV27yr7ofGM3UeSI6Al5vt1oso5TLMGWnYbr0847zVC3Z/PqpGfmkdfN0VmGzD8HWt6cMMozcXS2rxQztGbz7bm4viai0ifF1xxwDbH0TZGi83F7x1/wAAhrVIfzxzqlbbiFmfHEBVfSPigj1wR3/H1dYamVSC+WN74cs/peDz2cOwZPJABhtyKtPIuRg7FTPckMO8sekUGvUCuge4I6Gdjcp83BX4x+19AQDLMs7hUpltTrM9XViF7WeuQioxnLFja5MHRyDQQ4nLFfXm1v/tIQgClmcYRh9mpkbDVWGftvzuSjkeG2EYvXl782mL5uBrNI1Ysc0QNJ+6pafDR0Zu7hVg7tD89JfZ+CG7AIIgoE6rw1PrsnDkUgV83Fyw7KFBkHKnD1Ez5h1TIux1w3BDDnGqsBI/HzVs3Z3YL8Sq3Uh3DgjFkBhf1Dfo8erGlqci2mvVTsOb8/j4YJucYP1HKheZecrn/YxzaGjngt2MM1dxqrAK7goZZqTYd9pnZmo0InxdcbmiHkt/O3Pd+z/ZcxGlxvUs9yTab/t3W567NQ4Dwr1QUdeAp7/Mxi1vb0P/f27C76eKoJRL8dHDyYgN6OaU2og6O9Oi4lOFVe3+t6mzY7ghh3jnt7MQBCAhzMvqxZMSiQT/vDMeUgmw8Whhm11fLVFUVW8+JXq2cdTCHh4aGgU/dwXyS+vaNeWj0wt449fTAICpQyPh7WbdUQuWclXIsOjOBADAx7sutjkPr66owwfGUZunx/S0+TogS7kqZPh6Tir+Mq4XFDIpcopr0KATEObtimUPDUJSlK9T6iLqCiJ93dBNKYe2UY8LV607kLazYrghuzt+uQK/HCuERAKMietYB9Y+IZ7mM4Ze+vF4h37bWLntArQ6PQZFeiMpyqdDdbXFVSEzh6f/bDlr8YLorw/m46S6Ep4qOZ4Y5ZjusKPjAjGxXzB0egHPbziCWm3z6akGnR5PrstCRV0DEsI8cacD19q0RCGX4qkxPfHbgpF4b2oiMp4dhZ3Pjbb74ZhEXZ1UKkGfEA8AwAm1uDoVM9yQ3S39zdCf5o7+oQhqpadNeywY1xu+7gqcK6q2+tDHosp6c+O59h6QaY3pKVEI9FAit6QW71rQr6eqvgFvbTZMDf15TE+rD8i0xou3x8NDJceRSxWY/cnBZlutX//lFDJzy+ChkmPZ1CSnjdr8UaSfG27vH4pof3end/ol6ipM624O5zPcEFns6KUKpJ+4AqnEdiHCy80FfxtvOMH4nd/OWnWq7bKM89A0GkZtRvYKsEldbemmlOP/3WWY8vlg+wUcK2j7H5L3tpxDcbUGMf7umJESbff6rhXspcKaR4bAXSHD7vMleGTNfvx+8gqOXqrAn9YexEfGnkBv3T/ALuuUiMhxBhlHrTNzy5xciW0x3JBd/du4MPWugWHoEWi7hZ0PJEegf7gXqjSNrfY5ac3l8jqsMx7y+Jc0x53nkxYfjNv6hUCnF/Dct0danZ765agaHxi3u/99Yh+ndNVNivLBmkeHwE0hw94LpZj1yUHc8d5ObDYG1b+M64Xx8Y49yoCIbM80JX9CXSmqTsUMNzeARp0exwoqoK6oQ0m1BnrB8tOUOyIrrwxbThUZThW28dSPVGpYXCyRABsOFeC367Thv9abm05Dq9NjaIwvUrs79jyfl++Mh7ebC45frsSsTw40+8fk6KUKPPNVNgDD7qVxNu5G3B6Do33xzZxUTB0aiUhfwwhNWt8gbH7mZpv/eRKRc4R5uyLESwWdXrBLB3hnkTu7ALKfshotPt+Xi8/35UFd8b9TtX3cXHBTzwAkRfrYdVTAtNbm7sQwxPi7X+fu9kuM9MFjI2KxcvsFPL/hCH6NvBn+3ZRtPue3E1fwXVYBpBLg+QlxDl+bEeChxIppSZi15gB2nSvBjI/349m03ugd7IFvMvOxLOM86hv0GNkrAP93Wx+H1taSvqGeePVuw/EXDTq9U7v8EpF9JEX54L9H1DiYW4bUHv7OLscm+C+VSJ24XIkJ7+zAW5vPQF1RDw+lHN2UcsilEpTVNuCnw5ex9LczKCivs8vX359Tim1nrkImleDPt9jvt/wF43qhd5AHiqu1eP7boxDaGJUqr9Vi4XdHARi2fidG2m+HVFuGxfrhs9lD4amSIzO3DFM+3ItB/0rHqxtPoby2AfGhnvjP1MROs1DXhMGGSJySRbjuhv9aiVDG6SLcv2I3CivrEevvjn9PHoCD/xiLv0/sg/+7rS/u6B8CL1cXlNc1YOX28zh8qdymX18QBLxmXAczeXCEXRedqlxk+PfkgXCRSfDbySv4xw/HWgw4DTo9nv/2KK5WaRAb4I4F43rZrSZLJEb64JsnUnHngFAEehhGmyJ93fDGff3x/bzh8FTZ7hgIIqK2JEcb+kEdyiuDXu+YZQv2xmkpkTlwsRSPrT2IBp2A1O5+WD4tCV6u/3ujVMilSOnuj4ERPlh/MA9nrlRj/YF81Gl1GBZrm/Un6Seu4FBeOVQuUjztgLUZfUM98do9/fHsN4fx2d48SCUSvHRHPGTGlvu12kbM/fwQMk4bRpLevG8AVC72OcagPXoFeeDdKYkQBAElNVr4uCnMNRMROUpcsAfcFDJU1TfiTFEV4oLbdzxOZ8RwIyLqijo88VkmGnQCbo0PxrtTEltdU+OqkGFGSjR+OarGrvMl+PHwZcilEnOCt5ZOL+DNTYauuo8Oj7FJXxtL3JsUDp1g2IW0dk8utpwqwoODI1Ct0SH9RCHOX62BykWK96cOsmvDPmtIJJLrrhUiIrIXuUyKgRHe2H2+BAcvljHcUOdR36DDnE8zUVytRVywB5ZMHnDdxcJSiQQT+4UAAHadL8F3WQVwkUsxINzb6jq+2J+Hs0XV8HJ1wePGE7Ed5YHkCLjIJPjnTydwqazO3AQPALzdXLDq4cGdLtgQEXUGyVE+2H2+BJm5ZZg2zL7n2DkCw41I/Dv9DA5fqoC3mws+nJEMN4Vlf7QSY8Bp0AvYn1OKbw5egpuVJ08XVtTj9V9OAQDmj+3ZZDrMUe5ODMeEhBD8kF2AX44VIthTheRoX4zqHcDRESKiVgyOMYza771QAkEQunyXb4YbEcjOL8eHOwxN3966bwAifNu3gFcikeDOAaGo0+pwtKACn+/Lwz2J4egX7tWu13npx2Oo0jRiQIS3w7vqXkvlIsPkwZGYPDjSaTUQEXUlg6N9oZBLoa6ox/mr1egR6OHskjqEu6W6OE2jDn/75jD0AnDXwFCMtbLpm1Qiwf1J4YgNcIe2UY+HV+/H6cIqi5+/8agam45fgVwqwev39uPCWCKiLkTlIsNQ4+jN9jPFTq6m4xhuurjlGedx5ko1/Lsp8NId8R16LblMimlDoxDm7YrSGi2mfrgXZ69cP+AcvVSBZ78+DAB4fGSsKBajERHdaEb0NDTw23H2qpMr6TiGmy7sUlktlmecB2Bo6+9jg5OjVS4yPDo8BvGhniip0eLBlXux+1zrKT6/tBaPrDmAWq0OI3r64+kxzu0fQ0RE1rmph+EQ4b0XSqFpbPnsu66C4aYLe3XjSWga9UiJ9cNtxl1PtuCqkOGzWUOREGYIONNW7cO7v59FnfZ/f9kFQcCvxwpx34rdKK7WoE+IJ5Y9NMgphzwSEVHHxQV7wL+bEnUNOhzKLXd2OR3CBcVd1O7zxdh4tBBSCfDSnX1tvrLdx12Bb+ak4qUfjmP9wXwsST+DD7dfwPiEYCjkUpy7Uo39F0sBALH+7ljzyGB4sKsuEVGXJZVKMKKnP77LKsCOs1eR4uCDhW2Jv2Z3QY06PRb9dAIAMG1YlN3WuKhcZHj9vv54+/4BCPdxRZWmEd9kXsK6fXnYf7EUcqkET47ugY1Pj3BYsz4iIrKf/6276dqLijly0wWt25+HU4VV8HZzccgZSfcmhePuxDDsyynF1tNFULnIEOSpRGp3f7uc9k1ERM5xkzHcHLtcgSuV9V32F1eGmy6mrEaLt42dd/8yrhe83Tq+iNgSUqkEKd39uvQwJRERtS3QQ4VBkd44lFeOX46qMXN4jLNLsgqnpbqYJelnUFHXgLhgD0wZwiZ1RERkW7f1DwUA/HxU7eRKrMdw04WcVFfi8325AICX7oiHXMY/PiIisi3T7tsDF8tQWFHv5Gqsw3fHLkIQBPzzp+PQC8DEfsGcHiIiIrsI9lJhcLThkOGuOnrDcNNF/HKsEHsvlEIpl+LvE/s4uxwiIhIx0+jNz0cuO7kS6zDcdAH1DTq88vNJAMDjI7sj3Kd9B2MSERG1x8R+IZBIgEN55Sgor3N2Oe3GcNMFvL/1HArK6xDqpcITI7s7uxwiIhK5QE8VhsUYlj98uT/PydW0H8NNJ3e6sMp8ftQ/bu8LV4XMyRUREdGNYHpKFABg3b481Dd0rbOmGG46MZ1ewPMbjqBRL2Bc3yDcmhDs7JKIiOgGkdY3CKFeKpTUaPHj4a619obhphP7dM9FZOWVo5tSjn9NSrD5+VFEREStkcukmJ4SDQBYvesiBEFwbkHtwHDTSZ25UoXFv5wCADx3a28Ee3XNFthERNR1TRkSAZWLFCfVldiXU+rscizGcNMJ1Tfo8OcvsqBp1GNkrwA8NDTK2SUREdENyNtNgbsTwwEA/04/02VGbxhuOqFXfj6JU4VV8O+mwFv3D4BUyukoIiJyjnmju0Mpl2JfTil+PVbo7HIswnDTyXy2Nxef7jUcsfDW/QMQ4KF0ckVERHQjC/dxw+M3xwIAXtl4skvsnGK46US2nbmKl348DgBYMK4XRvUOdHJFREREwJxR3RHsqcKlsjqs3H7B2eVcF8NNJ5GZW4p5nx+CTi/gnkFheOqWHs4uiYiICADgppBj4cQ4AMC7v5/FgYude3Exw00nsPNsMaZ9tB/VmkakxPrhtXv6c9s3ERF1KncOCMXt/UPQqBfwxGeHoK7ovMcyMNw42TeZl/DomgOoa9BhRE9/fDxzMBRy/rEQEVHnIpFI8MZ9/REX7IHiag0e/zQTFbUNzi6rRXwXdZJqTSMWrM/Gs18fhlanx63xwfjo4WQer0BERJ2Wm0KOldOT4e3mgiOXKnDvit24VFbr7LKaYbhxMJ1ewJf783DLWxnYkFUAqQT4y7heeP+hQVDKGWyIiKhzi/RzwxePDUOwpwrniqpx97Ld2HS8sFP1wHF6uFm2bBliYmKgUqmQlJSEHTt2tHn/tm3bkJSUBJVKhdjYWKxYscJBlXZMSbUGH26/gLFLtuH5DUdRVKVBhK8r1j+egqfG9ISMvWyIiKiL6BPiie/mpSIu2ANXqwxTVDM+3o+DF0s7RciRO/OLr1+/HvPnz8eyZcswfPhwfPDBB5gwYQJOnDiByMjIZvfn5ORg4sSJeOyxx/DZZ59h165dmDt3LgICAnDvvfc64TtoXXG1BicuVyI7vxw7zl7Fobxy6PSGP3AvVxc8dUsPTE+J4mgNERF1SSFertgwNxXvbz2HD7fnYMfZYuw4W4xYf3fcnxyBh1Oj4KZwTsxwarhZsmQJZs2ahdmzZwMAli5dik2bNmH58uVYvHhxs/tXrFiByMhILF26FADQp08fHDx4EG+99ZbTw426og7/+P4YLpXVoaC8DlX1jc3u6RfmhalDI3HHgFB0Uzr1R09ERNRhbgo5/jo+Dg8kR+C9Lefw3yNqXCiuwYc7LmDWTTFOq8tp77BarRaZmZl4/vnnm1xPS0vD7t27W3zOnj17kJaW1uTa+PHjsWrVKjQ0NMDFxaXZczQaDTQajfnjiooKAEBlZWVHv4WmX6dWi83ZF80fSyRAlK8begd7YEisL4bH+iPc1w0AoNfUolLTygvZUW1NlcX32vrn05lqaA/Waz+dodbOUEN7sF776gz1doYarOHjAvxjfAyeGRWBTcfUaNDpUV9bjXobfg3T92vRtJfgJAUFBQIAYdeuXU2uv/LKK0KvXr1afE7Pnj2FV155pcm1Xbt2CQCEy5cvt/icl156SQDABx988MEHH3yI4JGfn3/djOH0uZE/NqsTBKHNBnYt3d/SdZOFCxdiwYIF5o/1ej1KS0vh5+d3QzXKq6ysREREBPLz8+Hp6enscro0/ixthz9L2+HP0jb4c7QdW/8sBUFAVVUVQkNDr3uv08KNv78/ZDIZCgubnjBaVFSEoKCgFp8THBzc4v1yuRx+fn4tPkepVEKpbHr4pLe3t/WFd3Genp78H9ZG+LO0Hf4sbYc/S9vgz9F2bPmz9PLysug+p20FVygUSEpKQnp6epPr6enpSE1NbfE5KSkpze7fvHkzkpOTW1xvQ0RERDcep/a5WbBgAT766CN8/PHHOHnyJJ555hnk5eVhzpw5AAxTSjNmzDDfP2fOHOTm5mLBggU4efIkPv74Y6xatQrPPvuss74FIiIi6mScuuZm8uTJKCkpwaJFi6BWq5GQkICNGzciKioKAKBWq5GXl2e+PyYmBhs3bsQzzzyD999/H6GhoXj33Xedvg28K1AqlXjppZeaTdFR+/FnaTv8WdoOf5a2wZ+j7TjzZykRhE7QSpCIiIjIRpx+/AIRERGRLTHcEBERkagw3BAREZGoMNwQERGRqDDc3ACWLVuGmJgYqFQqJCUlYceOHc4uqctZvHgxBg8eDA8PDwQGBuKuu+7C6dOnnV2WKCxevBgSiQTz5893dildUkFBAaZNmwY/Pz+4ublh4MCByMzMdHZZXU5jYyP+7//+DzExMXB1dUVsbCwWLVoEvV7v7NI6ve3bt+OOO+5AaGgoJBIJvv/++yafFwQBL7/8MkJDQ+Hq6opRo0bh+PHjdq2J4Ubk1q9fj/nz5+OFF15AVlYWRowYgQkTJjTZYk/Xt23bNsybNw979+5Feno6GhsbkZaWhpqaGmeX1qUdOHAAK1euRP/+/Z1dSpdUVlaG4cOHw8XFBb/88gtOnDiBt99++4buwm6t119/HStWrMB7772HkydP4o033sCbb76J//znP84urdOrqanBgAED8N5777X4+TfeeANLlizBe++9hwMHDiA4OBjjxo1DVZXlh4S223VPn6IubciQIcKcOXOaXIuLixOef/55J1UkDkVFRQIAYdu2bc4upcuqqqoSevbsKaSnpwsjR44Unn76aWeX1OU899xzwk033eTsMkThtttuEx599NEm1+655x5h2rRpTqqoawIgfPfdd+aP9Xq9EBwcLLz22mvma/X19YKXl5ewYsUKu9XBkRsR02q1yMzMRFpaWpPraWlp2L17t5OqEoeKigoAgK+vr5Mr6brmzZuH2267DWPHjnV2KV3Wjz/+iOTkZNx///0IDAxEYmIiPvzwQ2eX1SXddNNN+P3333HmzBkAwOHDh7Fz505MnDjRyZV1bTk5OSgsLGzyPqRUKjFy5Ei7vg85/VRwsp/i4mLodLpmB5EGBQU1O4CULCcIAhYsWICbbroJCQkJzi6nS/ryyy9x6NAhHDhwwNmldGkXLlzA8uXLsWDBAvz973/H/v378ec//xlKpbLJ0TV0fc899xwqKioQFxcHmUwGnU6HV155BVOmTHF2aV2a6b2mpfeh3Nxcu31dhpsbgEQiafKxIAjNrpHlnnzySRw5cgQ7d+50dildUn5+Pp5++mls3rwZKpXK2eV0aXq9HsnJyXj11VcBAImJiTh+/DiWL1/OcNNO69evx2effYZ169YhPj4e2dnZmD9/PkJDQ/Hwww87u7wuz9HvQww3Iubv7w+ZTNZslKaoqKhZiibLPPXUU/jxxx+xfft2hIeHO7ucLikzMxNFRUVISkoyX9PpdNi+fTvee+89aDQayGQyJ1bYdYSEhKBv375NrvXp0wfffvutkyrquv7617/i+eefx4MPPggA6NevH3Jzc7F48WKGmw4IDg4GYBjBCQkJMV+39/sQ19yImEKhQFJSEtLT05tcT09PR2pqqpOq6poEQcCTTz6JDRs2YMuWLYiJiXF2SV3WmDFjcPToUWRnZ5sfycnJeOihh5Cdnc1g0w7Dhw9v1pLgzJkz5sOHyXK1tbWQSpu+JcpkMm4F76CYmBgEBwc3eR/SarXYtm2bXd+HOHIjcgsWLMD06dORnJyMlJQUrFy5Enl5eZgzZ46zS+tS5s2bh3Xr1uGHH36Ah4eHeTTMy8sLrq6uTq6ua/Hw8Gi2Vsnd3R1+fn5cw9ROzzzzDFJTU/Hqq6/igQcewP79+7Fy5UqsXLnS2aV1OXfccQdeeeUVREZGIj4+HllZWViyZAkeffRRZ5fW6VVXV+PcuXPmj3NycpCdnQ1fX19ERkZi/vz5ePXVV9GzZ0/07NkTr776Ktzc3DB16lT7FWW3fVjUabz//vtCVFSUoFAohEGDBnH7shUAtPhYvXq1s0sTBW4Ft95PP/0kJCQkCEqlUoiLixNWrlzp7JK6pMrKSuHpp58WIiMjBZVKJcTGxgovvPCCoNFonF1ap7d169YW/318+OGHBUEwbAd/6aWXhODgYEGpVAo333yzcPToUbvWJBEEQbBfdCIiIiJyLK65ISIiIlFhuCEiIiJRYbghIiIiUWG4ISIiIlFhuCEiIiJRYbghIiIiUWG4ISIiIlFhuCEiIiJRYbghIiIiUWG4ISKbWrFiBTw8PNDY2Gi+Vl1dDRcXF4wYMaLJvTt27IBEIsGZM2cQHR0NiUTS7PHaa681+xppaWmQyWTYu3dvs8/NnDkTd911l/njixcvtvi61z5efvll833Z2dnNXnPUqFGYP3+++eMNGzZg/Pjx8Pf3b/U5ROQ8PDiTiGxq9OjRqK6uxsGDBzFs2DAAhhATHByMAwcOoLa2Fm5ubgCAjIwMhIaGolevXgCARYsW4bHHHmvyeh4eHk0+zsvLw549e/Dkk09i1apV5q/RmoiICKjVavPHb731Fn799Vf89ttv5mvdunVDcXGxxd9jTU0Nhg8fjvvvv79ZvUTkfAw3RGRTvXv3RmhoKDIyMszBIyMjA5MmTcLWrVuxe/dujB071nx99OjR5ud6eHggODi4zddfvXo1br/9djzxxBMYMmQIli5dCnd391bvl8lkTV6zW7dukMvlzb5Oe8LN9OnTARhGhYio8+G0FBHZ3KhRo7B161bzx1u3bsWoUaMwcuRI83WtVos9e/Y0CTfXIwgCVq9ejWnTpiEuLg69evXCV199ZfP6iahrY7ghIpsbNWoUdu3ahcbGRlRVVSErKws333wzRo4ciYyMDADA3r17UVdX1yTcPPfcc+jWrVuTh+l+APjtt99QW1uL8ePHAwCmTZuGVatW2bT21NTUZjXs2LHDpl+DiOyL01JEZHOjR49GTU0NDhw4gLKyMvTq1QuBgYEYOXIkpk+fjpqaGmRkZCAyMhKxsbHm5/31r3/FzJkzm7xWWFiY+b9XrVqFyZMnQy43/NM1ZcoU/PWvf8Xp06fRu3dvm9S+fv169OnTp8m1hx56yCavTUSOwXBDRDbXo0cPhIeHY+vWrSgrK8PIkSMBAMHBwYiJicGuXbuwdetW3HLLLU2e5+/vjx49erT4mqWlpfj+++/R0NCA5cuXm6/rdDp8/PHHeP31121Se0RERLMaXF1dbfLaROQYnJYiIrsYPXo0MjIykJGRgVGjRpmvjxw5Eps2bcLevXvbtd7m888/R3h4OA4fPozs7GzzY+nSpfjkk0+abD0nohsbR26IyC5Gjx6NefPmoaGhwTxyAxjCzRNPPIH6+vpm4aaqqgqFhYVNrrm5ucHT0xOrVq3Cfffdh4SEhCafj4qKwnPPPYeff/4ZkyZNAgBUVFQ06z3j6+uLyMhIm3xvpaWlyMvLw+XLlwEAp0+fBmAYmbrebi8isj+O3BCRXYwePRp1dXXo0aMHgoKCzNdHjhyJqqoqdO/eHREREU2e8+KLLyIkJKTJ429/+xsyMzNx+PBh3Hvvvc2+joeHB9LS0posLM7IyEBiYmKTx4svvmiz7+3HH39EYmIibrvtNgDAgw8+iMTERKxYscJmX4OIrCcRBEFwdhFEREREtsKRGyIiIhIVhhsiIiISFYYbIiIiEhWGGyIiIhIVhhsiIiISFYYbIiIiEhWGGyIiIhIVhhsiIiISFYYbIiIiEhWGGyIiIhIVhhsiIiISlf8PJ6m19vxptXwAAAAASUVORK5CYII=", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "sns.distplot(data['WEALTH1'])\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 265, + "id": "6de4ca6f", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Proportion of each unique value in WEALTH1:\n", + "WEALTH1\n", + "9.0 0.149660\n", + "8.0 0.133964\n", + "7.0 0.122335\n", + "6.0 0.114970\n", + "5.0 0.104190\n", + "4.0 0.094930\n", + "3.0 0.083577\n", + "2.0 0.080615\n", + "1.0 0.068157\n", + "0.0 0.047603\n", + "Name: proportion, dtype: float64\n", + "\n", + "Filled missing values in WEALTH1:\n", + "WEALTH1\n", + "9.0 0.148688\n", + "8.0 0.133249\n", + "7.0 0.121505\n", + "6.0 0.114788\n", + "5.0 0.104628\n", + "4.0 0.095476\n", + "3.0 0.084488\n", + "2.0 0.081035\n", + "1.0 0.068052\n", + "0.0 0.048090\n", + "Name: proportion, dtype: float64\n", + "Null values: 0\n" + ] + } + ], + "source": [ + "# PROPORTIONAL IMPUTATION TECHNIQUE\n", + "\n", + "# Calculate the proportion of each unique value in the non-null data\n", + "value_counts = data['WEALTH1'].value_counts(normalize=True)\n", + "print(\"Proportion of each unique value in WEALTH1:\")\n", + "print(value_counts)\n", + "\n", + "# Create a list of values to fill the missing entries, based on these proportions\n", + "fill_values = np.random.choice(value_counts.index, size=data['WEALTH1'].isnull().sum(), p=value_counts.values)\n", + "\n", + "# Fill the missing values with these proportional values\n", + "data.loc[data['WEALTH1'].isnull(), 'WEALTH1'] = fill_values\n", + "\n", + "# Check the result\n", + "print(\"\\nFilled missing values in WEALTH1:\")\n", + "print(data['WEALTH1'].value_counts(normalize=True))\n", + "print(\"Null values: \", (data['WEALTH1'].isnull().sum()))" + ] + }, + { + "cell_type": "markdown", + "id": "64c14952", + "metadata": {}, + "source": [ + "#### ADI" + ] + }, + { + "cell_type": "code", + "execution_count": 266, + "id": "e8d8fbf3", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ADI\n", + "13.0 7296\n", + "51.0 4622\n", + "65.0 3765\n", + "57.0 2836\n", + "105.0 2617\n", + " ... \n", + "651.0 1\n", + "103.0 1\n", + "601.0 1\n", + "161.0 1\n", + "147.0 1\n", + "Name: count, Length: 204, dtype: int64\n", + "float64\n", + "0\n" + ] + } + ], + "source": [ + "print (data['ADI'].value_counts())\n", + "print (data['ADI'].dtype)\n", + "print (data['ADI'].isna().sum())\n", + "# some values have very low counts" + ] + }, + { + "cell_type": "code", + "execution_count": 267, + "id": "81aad058", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAigAAAGdCAYAAAA44ojeAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8pXeV/AAAACXBIWXMAAA9hAAAPYQGoP6dpAAAgMUlEQVR4nO3df0xV9/3H8dctPy4/BncC7b296dWwjCztoJ3FlknXySZg2KyrLsPN1nSZa+xs2e6U0DL/GGsWWFmKdjE1sTHVahn9Y2NrMmnFbGV1pCll9Tu1S9elpsLkltSQe8FdLhTP9w/jyS6o9Qr1fLg8H8lJvOd8wPcJ0fvMuZdzXZZlWQIAADDIDU4PAAAAMB2BAgAAjEOgAAAA4xAoAADAOAQKAAAwDoECAACMQ6AAAADjECgAAMA4qU4PcC3Onz+vM2fOKCcnRy6Xy+lxAADAVbAsS6Ojo/L7/brhhitfI5mXgXLmzBkFAgGnxwAAANdgYGBAt9xyyxXXzMtAycnJkXThBHNzcx2eBgAAXI1IJKJAIGA/j1/JvAyUiy/r5ObmEigAAMwzV/P2DN4kCwAAjEOgAAAA4xAoAADAOAQKAAAwDoECAACMQ6AAAADjECgAAMA4BAoAADAOgQLAKL29vVq/fr16e3udHgWAgwgUAMYYHx9XW1ubPvzwQ7W1tWl8fNzpkQA4hEABYIwXX3xRZ8+elSSdPXtW7e3tDk8EwCkECgAjDA4Oqr29XZZlSbrwsezt7e0aHBx0eDIATiBQADjOsiw988wzl91/MVoALBwECgDHnT59Wn19fZqamorbPzU1pb6+Pp0+fdqhyQA4hUAB4LjFixfrrrvuuuSxu+++W4sXL77OEwFwGoECwHEul0t9fX2XPPbmm2/K5XJd54kAOI1AAeC4ioqKWR0HkHwIFAAAYBwCBQAAGIdAAeC41157bVbHASQfAgWAES4XIcQJsDARKAAAwDgECgBj3H333Vd8DGDhIFAAGOPNN9+84mMACweBAsAIl7vXCfdAARYmAgWA4z7pE4v5RGNg4SFQADjuwQcfnNVxAMmHQAEAAMYhUAA4Ljs7e1bHASQfAgWA486dOzer4wCSD4ECwHHf+MY3ZnUcQPIhUAA4rqGhYVbHASQfAgWA41588cVZHQeQfAgUAI577rnnZnUcQPIhUAA47uGHH57VcQDJh0AB4LgHHnhgVscBJJ+EA+U///mPHnzwQeXn5ysrK0tf+tKX1N/fbx+3LEtNTU3y+/3KzMxURUWFTp48Gfc9YrGY6urqVFBQoOzsbK1Zs4ZbWQMAAFtCgTIyMqJ77rlHaWlp6urq0jvvvKOnn35an/3sZ+01ra2tamtr065du9TX1yefz6eqqiqNjo7aa4LBoDo7O9XR0aGjR49qbGxMq1ev1tTU1JydGID54+23357VcQDJx2VZlnW1i5944gn97W9/0+uvv37J45Zlye/3KxgM6vHHH5d04WqJ1+vVU089pc2bNyscDuvGG2/UgQMHtH79eknSmTNnFAgEdOjQIa1ateoT54hEIvJ4PAqHw8rNzb3a8QEY6mo+sfi111771OcA8OlK5Pk7oSsoL7/8spYtW6bvfOc7uummm7R06dK4d9efOnVKoVBI1dXV9j63260VK1aot7dXktTf36/Jycm4NX6/X8XFxfaa6WKxmCKRSNwGIHlwHxQA0yUUKO+//752796toqIivfrqq3rkkUf04x//WC+88IIkKRQKSZK8Xm/c13m9XvtYKBRSenq6Fi1adNk107W0tMjj8dhbIBBIZGwAhqupqZnVcQDJJ6FAOX/+vO688041Nzdr6dKl2rx5sx5++GHt3r07bp3L5Yp7bFnWjH3TXWlNY2OjwuGwvQ0MDCQyNgDDuVwuHTx48JLHXnzxxU/8/wNA8klNZPHNN9+s2267LW7frbfeqt/97neSJJ/PJ+nCVZKbb77ZXjM8PGxfVfH5fJqYmNDIyEjcVZTh4WGVl5df8u91u91yu92JjApcNcuyND4+7vQYC15+fr5SUlLi3iyfkpKivLw8RaNRByeDJGVkZBCKuK4SCpR77rlH7777bty+f/3rX1qyZIkkqbCwUD6fT93d3Vq6dKkkaWJiQj09PXrqqackSaWlpUpLS1N3d7dqa2slSUNDQzpx4oRaW1tnfUJAosbHx3kJwVBTU1P8bAzR1dWlzMxMp8fAApJQoPz0pz9VeXm5mpubVVtbqzfffFN79uzRnj17JF24TBsMBtXc3KyioiIVFRWpublZWVlZ2rBhgyTJ4/Fo06ZN2rZtm/Lz85WXl6f6+nqVlJSosrJy7s8QAADMOwkFyl133aXOzk41NjbqySefVGFhoXbu3Bl3l8eGhgZFo1Ft2bJFIyMjKisr0+HDh5WTk2Ov2bFjh1JTU1VbW6toNKqVK1dq3759SklJmbszA65SRkaGurq6nB4DunA1a+3atZKkzs5OZWRkODwRLuJngestofugmIL7oADJKRqN2i/p8JICkHw+tfugAAAAXA8ECgAAMA6BAgAAjEOgAAAA4xAoAADAOAQKAAAwDoECAACMQ6AAAADjECgAAMA4BAoAADAOgQIAAIxDoAAAAOMQKAAAwDgECgAAMA6BAgAAjEOgAAAA4xAoAADAOAQKAAAwDoECAACMQ6AAAADjECgAAMA4BAoAADAOgQIAAIxDoAAAAOMQKAAAwDgECgAAMA6BAgAAjEOgAAAA4xAoAADAOAQKAAAwDoECAACMQ6AAAADjECgAAMA4BAoAADAOgQIAAIxDoAAAAOMQKAAAwDgECgAAMA6BAgAAjEOgAAAA4yQUKE1NTXK5XHGbz+ezj1uWpaamJvn9fmVmZqqiokInT56M+x6xWEx1dXUqKChQdna21qxZo8HBwbk5GwAAkBQSvoLyxS9+UUNDQ/Z2/Phx+1hra6va2tq0a9cu9fX1yefzqaqqSqOjo/aaYDCozs5OdXR06OjRoxobG9Pq1as1NTU1N2cEAADmvdSEvyA1Ne6qyUWWZWnnzp3avn271q1bJ0nav3+/vF6v2tvbtXnzZoXDYe3du1cHDhxQZWWlJOngwYMKBAI6cuSIVq1aNcvTAQAAySDhKyjvvfee/H6/CgsL9d3vflfvv/++JOnUqVMKhUKqrq6217rdbq1YsUK9vb2SpP7+fk1OTsat8fv9Ki4uttdcSiwWUyQSidsAAEDySihQysrK9MILL+jVV1/Vc889p1AopPLycp09e1ahUEiS5PV6477G6/Xax0KhkNLT07Vo0aLLrrmUlpYWeTweewsEAomMDQAA5pmEAqWmpkbf/va3VVJSosrKSv3pT3+SdOGlnItcLlfc11iWNWPfdJ+0prGxUeFw2N4GBgYSGRsAAMwzs/o14+zsbJWUlOi9996z35cy/UrI8PCwfVXF5/NpYmJCIyMjl11zKW63W7m5uXEbAABIXrMKlFgspn/+85+6+eabVVhYKJ/Pp+7ubvv4xMSEenp6VF5eLkkqLS1VWlpa3JqhoSGdOHHCXgMAAJDQb/HU19frvvvu0+LFizU8PKxf/vKXikQieuihh+RyuRQMBtXc3KyioiIVFRWpublZWVlZ2rBhgyTJ4/Fo06ZN2rZtm/Lz85WXl6f6+nr7JSMAAAApwUAZHBzU9773PX300Ue68cYb9eUvf1lvvPGGlixZIklqaGhQNBrVli1bNDIyorKyMh0+fFg5OTn299ixY4dSU1NVW1uraDSqlStXat++fUpJSZnbMwMAAPOWy7Isy+khEhWJROTxeBQOh3k/CpBEotGoampqJEldXV3KzMx0eCIAcymR528+iwcAABiHQAEAAMYhUAAAgHEIFAAAYBwCBQAAGIdAAQAAxiFQAACAcQgUAABgHAIFAAAYh0ABAADGIVAAAIBxCBQAAGAcAgUAABiHQAEAAMYhUAAAgHEIFAAAYBwCBQAAGIdAAQAAxiFQAACAcQgUAABgHAIFAAAYh0ABAADGIVAAAIBxCBQAAGAcAgUAABiHQAEAAMYhUAAAgHEIFAAAYBwCBQAAGIdAAQAAxiFQAACAcQgUAABgHAIFAAAYh0ABAADGIVAAAIBxCBQAAGAcAgUAABiHQAEAAMYhUAAAgHEIFAAAYBwCBQAAGGdWgdLS0iKXy6VgMGjvsyxLTU1N8vv9yszMVEVFhU6ePBn3dbFYTHV1dSooKFB2drbWrFmjwcHB2YwCAACSyDUHSl9fn/bs2aPbb789bn9ra6va2tq0a9cu9fX1yefzqaqqSqOjo/aaYDCozs5OdXR06OjRoxobG9Pq1as1NTV17WcCAACSxjUFytjYmB544AE999xzWrRokb3fsizt3LlT27dv17p161RcXKz9+/frv//9r9rb2yVJ4XBYe/fu1dNPP63KykotXbpUBw8e1PHjx3XkyJG5OSsAADCvXVOgPProo/rmN7+pysrKuP2nTp1SKBRSdXW1vc/tdmvFihXq7e2VJPX392tycjJujd/vV3Fxsb1mulgspkgkErcBAIDklZroF3R0dOjvf/+7+vr6ZhwLhUKSJK/XG7ff6/Xqgw8+sNekp6fHXXm5uObi10/X0tKiX/ziF4mOCgAA5qmErqAMDAzoJz/5iQ4ePKiMjIzLrnO5XHGPLcuasW+6K61pbGxUOBy2t4GBgUTGBgAA80xCgdLf36/h4WGVlpYqNTVVqamp6unp0W9+8xulpqbaV06mXwkZHh62j/l8Pk1MTGhkZOSya6Zzu93Kzc2N2wAAQPJKKFBWrlyp48eP69ixY/a2bNkyPfDAAzp27Jg+97nPyefzqbu72/6aiYkJ9fT0qLy8XJJUWlqqtLS0uDVDQ0M6ceKEvQYAACxsCb0HJScnR8XFxXH7srOzlZ+fb+8PBoNqbm5WUVGRioqK1NzcrKysLG3YsEGS5PF4tGnTJm3btk35+fnKy8tTfX29SkpKZrzpFgAALEwJv0n2kzQ0NCgajWrLli0aGRlRWVmZDh8+rJycHHvNjh07lJqaqtraWkWjUa1cuVL79u1TSkrKXI8DAADmIZdlWZbTQyQqEonI4/EoHA7zfhQgiUSjUdXU1EiSurq6lJmZ6fBEAOZSIs/ffBYPAAAwDoECAACMQ6AAAADjECgAAMA4BAoAADAOgQIAAIxDoAAAAOMQKAAAwDgECgAAMA6BAgAAjEOgAAAA4xAoAADAOAQKAAAwDoECAACMQ6AAAADjECgAAMA4BAoAADAOgQIAAIxDoAAAAOMQKAAAwDgECgAAMA6BAgAAjEOgAAAA4xAoAADAOAQKAAAwDoECAACMQ6AAAADjECgAAMA4BAoAADAOgQIAAIxDoAAAAOMQKAAAwDgECgAAMA6BAgAAjEOgAAAA4xAoAADAOAQKAAAwDoECAACMQ6AAAADjECgAAMA4CQXK7t27dfvttys3N1e5ublavny5urq67OOWZampqUl+v1+ZmZmqqKjQyZMn475HLBZTXV2dCgoKlJ2drTVr1mhwcHBuzgYAACSFhALllltu0a9+9Su99dZbeuutt/T1r39d3/rWt+wIaW1tVVtbm3bt2qW+vj75fD5VVVVpdHTU/h7BYFCdnZ3q6OjQ0aNHNTY2ptWrV2tqampuzwwAAMxbLsuyrNl8g7y8PP3617/WD37wA/n9fgWDQT3++OOSLlwt8Xq9euqpp7R582aFw2HdeOONOnDggNavXy9JOnPmjAKBgA4dOqRVq1Zd1d8ZiUTk8XgUDoeVm5s7m/EBGCQajaqmpkaS1NXVpczMTIcnAjCXEnn+vub3oExNTamjo0Pnzp3T8uXLderUKYVCIVVXV9tr3G63VqxYod7eXklSf3+/Jicn49b4/X4VFxfbawAAAFIT/YLjx49r+fLlGh8f12c+8xl1dnbqtttuswPD6/XGrfd6vfrggw8kSaFQSOnp6Vq0aNGMNaFQ6LJ/ZywWUywWsx9HIpFExwYAAPNIwldQvvCFL+jYsWN644039KMf/UgPPfSQ3nnnHfu4y+WKW29Z1ox9033SmpaWFnk8HnsLBAKJjg0AAOaRhAMlPT1dn//857Vs2TK1tLTojjvu0DPPPCOfzydJM66EDA8P21dVfD6fJiYmNDIyctk1l9LY2KhwOGxvAwMDiY4NAADmkVnfB8WyLMViMRUWFsrn86m7u9s+NjExoZ6eHpWXl0uSSktLlZaWFrdmaGhIJ06csNdcitvttn+1+eIGAACSV0LvQfnZz36mmpoaBQIBjY6OqqOjQ6+99ppeeeUVuVwuBYNBNTc3q6ioSEVFRWpublZWVpY2bNggSfJ4PNq0aZO2bdum/Px85eXlqb6+XiUlJaqsrPxUThAAAMw/CQXKhx9+qI0bN2poaEgej0e33367XnnlFVVVVUmSGhoaFI1GtWXLFo2MjKisrEyHDx9WTk6O/T127Nih1NRU1dbWKhqNauXKldq3b59SUlLm9swAAMC8Nev7oDiB+6AAyYn7oADJ7brcBwUAAODTQqAAAADjJHyjNswNy7I0Pj7u9BiAUf733wT/PoBLy8jI+MT7iyUDAsUh4+Pj9mvtAGZau3at0yMARloo78/iJR4AAGAcrqAYYOxL35N1Az8KQJYlnf/4wp9vSJUWwGVs4Gq4zn+szxz7rdNjXFc8KxrAuiFVSklzegzAEOlODwAYZ97dD2QO8BIPAAAwDoECAACMQ6AAAADjECgAAMA4BAoAADAOgQIAAIxDoAAAAOMQKAAAwDgECgAAMA6BAgAAjEOgAAAA4xAoAADAOAQKAAAwDoECAACMQ6AAAADjECgAAMA4BAoAADAOgQIAAIxDoAAAAOMQKAAAwDgECgAAMA6BAgAAjEOgAAAA4xAoAADAOAQKAAAwDoECAACMQ6AAAADjECgAAMA4BAoAADAOgQIAAIxDoAAAAOMQKAAAwDgECgAAMA6BAgAAjJNQoLS0tOiuu+5STk6ObrrpJt1///16991349ZYlqWmpib5/X5lZmaqoqJCJ0+ejFsTi8VUV1engoICZWdna82aNRocHJz92QAAgKSQUKD09PTo0Ucf1RtvvKHu7m59/PHHqq6u1rlz5+w1ra2tamtr065du9TX1yefz6eqqiqNjo7aa4LBoDo7O9XR0aGjR49qbGxMq1ev1tTU1NydGQAAmLdSE1n8yiuvxD1+/vnnddNNN6m/v19f/epXZVmWdu7cqe3bt2vdunWSpP3798vr9aq9vV2bN29WOBzW3r17deDAAVVWVkqSDh48qEAgoCNHjmjVqlVzdGoAAGC+mtV7UMLhsCQpLy9PknTq1CmFQiFVV1fba9xut1asWKHe3l5JUn9/vyYnJ+PW+P1+FRcX22umi8ViikQicRsAAEhe1xwolmVp69at+spXvqLi4mJJUigUkiR5vd64tV6v1z4WCoWUnp6uRYsWXXbNdC0tLfJ4PPYWCASudWwAADAPXHOgPPbYY/rHP/6h3/72tzOOuVyuuMeWZc3YN92V1jQ2NiocDtvbwMDAtY4NAADmgWsKlLq6Or388sv6y1/+oltuucXe7/P5JGnGlZDh4WH7qorP59PExIRGRkYuu2Y6t9ut3NzcuA0AACSvhALFsiw99thj+v3vf68///nPKiwsjDteWFgon8+n7u5ue9/ExIR6enpUXl4uSSotLVVaWlrcmqGhIZ04ccJeAwAAFraEfovn0UcfVXt7u/74xz8qJyfHvlLi8XiUmZkpl8ulYDCo5uZmFRUVqaioSM3NzcrKytKGDRvstZs2bdK2bduUn5+vvLw81dfXq6SkxP6tHgAAsLAlFCi7d++WJFVUVMTtf/755/X9739fktTQ0KBoNKotW7ZoZGREZWVlOnz4sHJycuz1O3bsUGpqqmpraxWNRrVy5Urt27dPKSkpszsbAACQFFyWZVlOD5GoSCQij8ejcDg8b9+PEo1GVVNTI0kavXOjlJLm8EQAAGNNTSrn7wckSV1dXcrMzHR4oGuTyPM3n8UDAACMQ6AAAADjECgAAMA4BAoAADBOQr/Fg7kT997kqUnnBgEAmO9/nifm4e+2XBMCxSGxWMz+c87/dTg4CQBgPonFYsrKynJ6jE8dL/EAAADjcAXFIW632/7z6B3f5T4oAIDLm5q0r7b/7/NHMiNQHBL3yc0paQQKAOCqxD1/JDFe4gEAAMYhUAAAgHEIFAAAYBwCBQAAGIdAAQAAxiFQAACAcQgUAABgHAIFAAAYh0ABAADGIVAAAIBxCBQAAGAcAgUAABiHQAEAAMYhUAAAgHEIFAAAYBwCBQAAGIdAAQAAxiFQAACAcQgUAABgHAIFAAAYh0ABAADGIVAAAIBxCBQAAGAcAgUAABiHQAEAAMYhUAAAgHEIFAAAYBwCBQAAGIdAAQAAxiFQAACAcQgUAABgHAIFAAAYJ+FA+etf/6r77rtPfr9fLpdLf/jDH+KOW5alpqYm+f1+ZWZmqqKiQidPnoxbE4vFVFdXp4KCAmVnZ2vNmjUaHByc1YkAAIDkkXCgnDt3TnfccYd27dp1yeOtra1qa2vTrl271NfXJ5/Pp6qqKo2OjtprgsGgOjs71dHRoaNHj2psbEyrV6/W1NTUtZ8JAABIGqmJfkFNTY1qamouecyyLO3cuVPbt2/XunXrJEn79++X1+tVe3u7Nm/erHA4rL179+rAgQOqrKyUJB08eFCBQEBHjhzRqlWrZnE6AAAgGczpe1BOnTqlUCik6upqe5/b7daKFSvU29srServ79fk5GTcGr/fr+LiYnvNdLFYTJFIJG4DAADJa04DJRQKSZK8Xm/cfq/Xax8LhUJKT0/XokWLLrtmupaWFnk8HnsLBAJzOTYAADDMp/JbPC6XK+6xZVkz9k13pTWNjY0Kh8P2NjAwMGezAgAA88xpoPh8PkmacSVkeHjYvqri8/k0MTGhkZGRy66Zzu12Kzc3N24DAADJa04DpbCwUD6fT93d3fa+iYkJ9fT0qLy8XJJUWlqqtLS0uDVDQ0M6ceKEvQYAACxsCf8Wz9jYmP7973/bj0+dOqVjx44pLy9PixcvVjAYVHNzs4qKilRUVKTm5mZlZWVpw4YNkiSPx6NNmzZp27Ztys/PV15enurr61VSUmL/Vg8AAFjYEg6Ut956S1/72tfsx1u3bpUkPfTQQ9q3b58aGhoUjUa1ZcsWjYyMqKysTIcPH1ZOTo79NTt27FBqaqpqa2sVjUa1cuVK7du3TykpKXNwSgAAYL5zWZZlOT1EoiKRiDwej8Lh8Lx9P0o0GrXvJzN650YpJc3hiQAAxpqaVM7fD0iSurq6lJmZ6fBA1yaR528+iwcAABiHQAEAAMYhUAAAgHEIFAAAYBwCBQAAGIdAAQAAxiFQAACAcQgUAABgHAIFAAAYJ+Fb3WPuuc5/rHl3O1/g02BZ0vmPL/z5hlTJ5XJ2HsAQrov/LhYQAsUAnzn2W6dHAADAKLzEAwAAjMMVFIdkZGSoq6vL6TEAo4yPj2vt2rWSpM7OTmVkZDg8EWCehfLvgkBxiMvlmrefRglcDxkZGfwbARYwXuIBAADGIVAAAIBxCBQAAGAcAgUAABiHQAEAAMYhUAAAgHEIFAAAYBwCBQAAGIdAAQAAxiFQAACAcQgUAABgHAIFAAAYh0ABAADGIVAAAIBxCBQAAGAcAgUAABiHQAEAAMYhUAAAgHEIFAAAYBwCBQAAGIdAAQAAxiFQAACAcQgUAABgHAIFAAAYh0ABAADGIVAAAIBxHA2UZ599VoWFhcrIyFBpaalef/11J8cBAACGcCxQXnrpJQWDQW3fvl1vv/227r33XtXU1Oj06dNOjQQAAAyR6tRf3NbWpk2bNumHP/yhJGnnzp169dVXtXv3brW0tDg1FhYgy7I0Pj7u9BiQ4n4O/EzMkpGRIZfL5fQYWEAcCZSJiQn19/friSeeiNtfXV2t3t7eGetjsZhisZj9OBKJfOozYuEYHx9XTU2N02NgmrVr1zo9Av5HV1eXMjMznR4DC4gjL/F89NFHmpqaktfrjdvv9XoVCoVmrG9paZHH47G3QCBwvUYFAAAOcOwlHkkzLhdalnXJS4iNjY3aunWr/TgSiRApmDMZGRnq6upyegzowv8BF6+Wut1uXlIwSEZGhtMjYIFxJFAKCgqUkpIy42rJ8PDwjKsq0oX/qNxu9/UaDwuMy+Xi0rVBsrKynB4BgAEceYknPT1dpaWl6u7ujtvf3d2t8vJyJ0YCAAAGcewlnq1bt2rjxo1atmyZli9frj179uj06dN65JFHnBoJAAAYwrFAWb9+vc6ePasnn3xSQ0NDKi4u1qFDh7RkyRKnRgIAAIZwWZZlOT1EoiKRiDwej8LhsHJzc50eBwAAXIVEnr/5LB4AAGAcAgUAABiHQAEAAMYhUAAAgHEIFAAAYBwCBQAAGIdAAQAAxiFQAACAcQgUAABgHMdudT8bF29+G4lEHJ4EAABcrYvP21dzE/t5GSijo6OSpEAg4PAkAAAgUaOjo/J4PFdcMy8/i+f8+fM6c+aMcnJy5HK5nB4HwByKRCIKBAIaGBjgs7aAJGNZlkZHR+X3+3XDDVd+l8m8DBQAyYsPAwUg8SZZAABgIAIFAAAYh0ABYBS3262f//zncrvdTo8CwEG8BwUAABiHKygAAMA4BAoAADAOgQIAAIxDoAAAAOMQKACM8uyzz6qwsFAZGRkqLS3V66+/7vRIABxAoAAwxksvvaRgMKjt27fr7bff1r333quamhqdPn3a6dEAXGf8mjEAY5SVlenOO+/U7t277X233nqr7r//frW0tDg4GYDrjSsoAIwwMTGh/v5+VVdXx+2vrq5Wb2+vQ1MBcAqBAsAIH330kaampuT1euP2e71ehUIhh6YC4BQCBYBRXC5X3GPLsmbsA5D8CBQARigoKFBKSsqMqyXDw8MzrqoASH4ECgAjpKenq7S0VN3d3XH7u7u7VV5e7tBUAJyS6vQAAHDR1q1btXHjRi1btkzLly/Xnj17dPr0aT3yyCNOjwbgOiNQABhj/fr1Onv2rJ588kkNDQ2puLhYhw4d0pIlS5weDcB1xn1QAACAcXgPCgAAMA6BAgAAjEOgAAAA4xAoAADAOAQKAAAwDoECAACMQ6AAAADjECgAAMA4BAoAADAOgQIAAIxDoAAAAOMQKAAAwDj/D9TABctf8r/kAAAAAElFTkSuQmCC", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "sns.boxplot(data['ADI'])\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 268, + "id": "f0c45f13", + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "count 95280.000000\n", + "mean 187.356402\n", + "std 137.019184\n", + "min 0.000000\n", + "25% 65.000000\n", + "50% 175.000000\n", + "75% 279.000000\n", + "max 651.000000\n", + "Name: ADI, dtype: float64" + ] + }, + "execution_count": 268, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data['ADI'].describe()" + ] + }, + { + "cell_type": "code", + "execution_count": 269, + "id": "3759fa9f", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "min: 0.0, Q1: 65.0, Q2: 175.0, Q3: 279.0, Upper Bound: 600.0, max: 651.0\n" + ] + } + ], + "source": [ + "# GROUP VALUES AND ASSIGN BINS FOR THE DISTRIBUTION\n", + "\n", + "min_val = data['ADI'].min()\n", + "Q1 = data['ADI'].quantile(0.25)\n", + "Q2 = data['ADI'].median()\n", + "Q3 = data['ADI'].quantile(0.75)\n", + "IQR = Q3 - Q1\n", + "upper_bound = Q3 + 1.5 * IQR\n", + "max_val = data['ADI'].max()\n", + "\n", + "print(f\"min: {min_val}, Q1: {Q1}, Q2: {Q2}, Q3: {Q3}, Upper Bound: {upper_bound}, max: {max_val}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 270, + "id": "b0fb25c5", + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Counts in each group:\n", + "ADI_bin\n", + "Lower Bound 24122\n", + "Q1 23617\n", + "Q3 24093\n", + "Upper Bound 22840\n", + "Outliers 608\n", + "dtype: int64\n", + "\n" + ] + } + ], + "source": [ + "# Define bins and labels\n", + "bins = [min_val, Q1, Q2, Q3, upper_bound, max_val] \n", + "labels = ['Lower Bound', 'Q1', 'Q3', 'Upper Bound', 'Outliers'] \n", + "\n", + "# Create a new column with the binned ranges\n", + "data['ADI_bin'] = pd.cut(data['ADI'], bins=bins, labels=labels, include_lowest=True)\n", + "\n", + "# Group by the new 'ADI_bin' column\n", + "grouped = data.groupby('ADI_bin')\n", + "\n", + "# Get counts for each group\n", + "print(\"Counts in each group:\")\n", + "print(grouped.size())\n", + "print()" + ] + }, + { + "cell_type": "code", + "execution_count": 271, + "id": "75dd33cf", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 3\n", + "1 0\n", + "2 4\n", + "3 1\n", + "4 1\n", + " ..\n", + "95407 0\n", + "95408 3\n", + "95409 0\n", + "95410 0\n", + "95411 4\n", + "Name: ADI, Length: 95280, dtype: category\n", + "Categories (5, int64): [0 < 1 < 3 < 4 < 5]" + ] + }, + "execution_count": 271, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Define the ordinal encoding for each variable\n", + "coverage_mapping = {'Lower Bound': 0, 'Q1': 1, 'Q3': 3, 'Upper Bound': 4, 'Outliers': 5}\n", + "\n", + "# Apply ordinal encoding with the custom mappings\n", + "data['ADI'] = data['ADI_bin'].map(coverage_mapping)\n", + "\n", + "# Drop the 'ADI_bin' column if no longer needed\n", + "data.drop('ADI_bin', axis=1, inplace=True)\n", + "data['ADI']" + ] + }, + { + "cell_type": "markdown", + "id": "81511999", + "metadata": {}, + "source": [ + "#### DMA" + ] + }, + { + "cell_type": "code", + "execution_count": 272, + "id": "6808e0d2", + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "DMA\n", + "803.0 7296\n", + "602.0 4632\n", + "807.0 3765\n", + "505.0 2839\n", + "819.0 2588\n", + " ... \n", + "569.0 1\n", + "554.0 1\n", + "584.0 1\n", + "552.0 1\n", + "516.0 1\n", + "Name: count, Length: 206, dtype: int64\n", + "float64\n", + "0\n" + ] + } + ], + "source": [ + "print (data['DMA'].value_counts())\n", + "print (data['DMA'].dtype)\n", + "print (data['DMA'].isna().sum())\n", + "# some values have very low counts" + ] + }, + { + "cell_type": "code", + "execution_count": 273, + "id": "e827bba6", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAigAAAGdCAYAAAA44ojeAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8pXeV/AAAACXBIWXMAAA9hAAAPYQGoP6dpAAAZ+UlEQVR4nO3dX2zV933/8Zdjg22Y7QVY7Fr1Uiq5WlazXxNTodJ2QQIcoWZZEmmkTZZ1Wi6ISNm8JEuH2J8kUm2FqQStqFRUUZMFEXIztF4ENY62oiI2jdGka9jUXiwKZMHyWlm2WY1N4PwuopzV0KQxEM7H8HhIR7K/53PC+yvLOU99vsfn1FUqlUoAAApyTa0HAAA4l0ABAIojUACA4ggUAKA4AgUAKI5AAQCKI1AAgOIIFACgOA21HuBCnD17Nm+++WZaWlpSV1dX63EAgPehUqlkYmIinZ2dueaa994jmZOB8uabb6arq6vWYwAAF+D48eP58Ic//J5r5mSgtLS0JHn7BFtbW2s8DQDwfoyPj6erq6v6PP5e5mSgvHNZp7W1VaAAwBzzfl6e4UWyAEBxBAoAUByBAgAUR6AAAMURKABAcQQKAFAcgQIAFEegAADFESgAQHEECgBQHIECABRnTn4WD1xKlUolp06dqvUY5O2fxdTUVJKksbHxfX1eB5dHU1OTnweXlUDhqnfq1KmsW7eu1mNA0fbv35/m5uZaj8FVxCUeAKA4dlC46jU1NWX//v21HoO8vZt1xx13JEn27duXpqamGk/EO/wsuNwECle9uro6W9cFampq8nOBq5hLPABAcQQKAFAcgQIAFEegAADFESgAQHEECgBQHIECABRHoAAAxREoAEBxBAoAUByBAgAUR6AAAMURKABAcQQKAFAcgQIAFEegAADFESgAQHEECgBQHIECABRHoAAAxREoAEBxBAoAUByBAgAUR6AAAMWZVaC89dZb+Yu/+IssXbo0zc3N+ehHP5rHH388Z8+era6pVCp59NFH09nZmebm5qxatSpHjx6d8d+ZmprKpk2bsmTJkixcuDC33XZb3njjjUtzRgDAnDerQHniiSfyjW98Izt27Mh//ud/ZuvWrfmbv/mbfO1rX6uu2bp1a7Zt25YdO3bk8OHD6ejoyNq1azMxMVFd09/fn3379mXv3r05ePBgTp48mVtvvTVnzpy5dGcGAMxZDbNZ/M///M/53d/93Xzuc59LknzkIx/Jc889l3/7t39L8vbuyfbt27Nly5bceeedSZJnnnkm7e3t2bNnTzZs2JCxsbE89dRTefbZZ7NmzZokye7du9PV1ZWXXnopt9xyy6U8v2JVKpWcOnWq1mNAUX7+d8LvB/xiTU1Nqaurq/UYH7hZBcpnPvOZfOMb38iPf/zjfOxjH8sPfvCDHDx4MNu3b0+SvPbaaxkeHk5fX1/1MY2Njbn55ptz6NChbNiwIUeOHMnp06dnrOns7ExPT08OHTr0CwNlamoqU1NT1e/Hx8dne57FOXXqVNatW1frMaBYd9xxR61HgCLt378/zc3NtR7jAzerQPnyl7+csbGx/MZv/Ebq6+tz5syZfOUrX8kXvvCFJMnw8HCSpL29fcbj2tvb8/rrr1fXzJ8/P9dee+15a955/LkGBwfz2GOPzWZUAGAOm1WgPP/889m9e3f27NmTj3/843nllVfS39+fzs7OfPGLX6yuO3frqVKp/NLtqPdas3nz5jz44IPV78fHx9PV1TWb0Yt28hNfSOWaWf0o4MpUqSRn33r762sakqtgGxvej7qzb+VXXnmu1mNcVrN6VvyzP/uz/Pmf/3k+//nPJ0mWLVuW119/PYODg/niF7+Yjo6OJG/vknzoQx+qPm5kZKS6q9LR0ZHp6emMjo7O2EUZGRnJypUrf+G/29jYmMbGxtmd2RxSuaYhqZ9X6zGgEPNrPQAUp1LrAWpgVn/F87Of/SzXXDPzIfX19dU/M166dGk6OjoyNDRUvX96ejoHDhyoxkdvb2/mzZs3Y82JEyfy6quvvmugAABXl1ntoPzO7/xOvvKVr+TXf/3X8/GPfzwvv/xytm3blj/6oz9K8valnf7+/gwMDKS7uzvd3d0ZGBjIggULcvfddydJ2tract999+Whhx7K4sWLs2jRojz88MNZtmxZ9a96AICr26wC5Wtf+1r+8i//Mhs3bszIyEg6OzuzYcOG/NVf/VV1zSOPPJLJycls3Lgxo6OjWbFiRV588cW0tLRU1zz55JNpaGjI+vXrMzk5mdWrV+fpp59OfX39pTszAGDOqqtUKnPu0tb4+Hja2toyNjaW1tbWWo9zQSYnJ6t/Zjxx071egwLAuztzOi3ffzbJ3P4z49k8f/ssHgCgOAIFACiOQAEAiiNQAIDiCBQAoDgCBQAojkABAIojUACA4ggUAKA4AgUAKI5AAQCKI1AAgOIIFACgOAIFACiOQAEAiiNQAIDiCBQAoDgCBQAojkABAIojUACA4ggUAKA4AgUAKI5AAQCKI1AAgOIIFACgOAIFACiOQAEAiiNQAIDiCBQAoDgCBQAojkABAIojUACA4ggUAKA4AgUAKI5AAQCKI1AAgOIIFACgOAIFACiOQAEAiiNQAIDiCBQAoDgCBQAojkABAIojUACA4ggUAKA4AgUAKI5AAQCKI1AAgOIIFACgOAIFACiOQAEAiiNQAIDiCBQAoDgCBQAoTkOtB7haVSqV//vmzOnaDQJA+X7ueWLG88cVTKDUyNTUVPXrlh/sreEkAMwlU1NTWbBgQa3H+MC5xAMAFMcOSo00NjZWv574f59P6ufVcBoAinbmdHW3/eefP65kAqVG6urq/u+b+nkCBYD3ZcbzxxXMJR4AoDgCBQAojkABAIojUACA4ggUAKA4AgUAKM6sA+W///u/8/u///tZvHhxFixYkE984hM5cuRI9f5KpZJHH300nZ2daW5uzqpVq3L06NEZ/42pqals2rQpS5YsycKFC3PbbbfljTfeuPizAQCuCLMKlNHR0Xz605/OvHnzsn///vzHf/xHvvrVr+ZXf/VXq2u2bt2abdu2ZceOHTl8+HA6Ojqydu3aTExMVNf09/dn37592bt3bw4ePJiTJ0/m1ltvzZkzZy7ZiQEAc9es3qjtiSeeSFdXV771rW9Vj33kIx+pfl2pVLJ9+/Zs2bIld955Z5LkmWeeSXt7e/bs2ZMNGzZkbGwsTz31VJ599tmsWbMmSbJ79+50dXXlpZdeyi233HIJTgsAmMtmtYPy7W9/O8uXL8/v/d7v5brrrsuNN96Yb37zm9X7X3vttQwPD6evr696rLGxMTfffHMOHTqUJDly5EhOnz49Y01nZ2d6enqqawCAq9usAuW//uu/snPnznR3d+c73/lO7r///vzxH/9x/u7v/i5JMjw8nCRpb2+f8bj29vbqfcPDw5k/f36uvfbad11zrqmpqYyPj8+4AQBXrlld4jl79myWL1+egYGBJMmNN96Yo0ePZufOnfmDP/iD6rpzPyegUqn80s8OeK81g4ODeeyxx2YzKgAwh81qB+VDH/pQfvM3f3PGsRtuuCHHjh1LknR0dCTJeTshIyMj1V2Vjo6OTE9PZ3R09F3XnGvz5s0ZGxur3o4fPz6bsQGAOWZWgfLpT386P/rRj2Yc+/GPf5zrr78+SbJ06dJ0dHRkaGioev/09HQOHDiQlStXJkl6e3szb968GWtOnDiRV199tbrmXI2NjWltbZ1xAwCuXLO6xPOnf/qnWblyZQYGBrJ+/fr867/+a3bt2pVdu3YlefvSTn9/fwYGBtLd3Z3u7u4MDAxkwYIFufvuu5MkbW1tue+++/LQQw9l8eLFWbRoUR5++OEsW7as+lc9AMDVbVaB8slPfjL79u3L5s2b8/jjj2fp0qXZvn177rnnnuqaRx55JJOTk9m4cWNGR0ezYsWKvPjii2lpaamuefLJJ9PQ0JD169dncnIyq1evztNPP536+vpLd2YAwJxVV6lUKrUeYrbGx8fT1taWsbGxOXu5Z3JyMuvWrUuSTNx0b1I/r8YTAVCsM6fT8v1nkyT79+9Pc3NzjQe6MLN5/vZZPABAcQQKAFAcgQIAFEegAADFESgAQHEECgBQHIECABRnVm/Uxgej7uxbmXNvRgMfhEolOfvW219f05D8kg8ZhatF3Tu/F1cRgVKAX3nluVqPAABFcYkHACiOHZQaaWpqyv79+2s9BhTl1KlTueOOO5Ik+/btS1NTU40ngvJcLb8XAqVG6urq5uxnKcDl0NTU5HcErmIu8QAAxREoAEBxBAoAUByBAgAUR6AAAMURKABAcQQKAFAcgQIAFEegAADFESgAQHEECgBQHIECABRHoAAAxREoAEBxBAoAUByBAgAUR6AAAMURKABAcRpqPQDUWqVSyalTp2o9BsmMn4OfSVmamppSV1dX6zG4iggUrnqnTp3KunXraj0G57jjjjtqPQI/Z//+/Wlubq71GFxFXOIBAIpjB4WrXlNTU/bv31/rMcjbl9umpqaSJI2NjS4pFKSpqanWI3CVEShc9erq6mxdF2TBggW1HgEogEs8AEBxBAoAUByBAgAUR6AAAMURKABAcQQKAFAcgQIAFEegAADFESgAQHEECgBQHIECABRHoAAAxREoAEBxBAoAUByBAgAUR6AAAMURKABAcQQKAFAcgQIAFEegAADFESgAQHEECgBQHIECABRHoAAAxREoAEBxBAoAUByBAgAUR6AAAMURKABAcQQKAFAcgQIAFOeiAmVwcDB1dXXp7++vHqtUKnn00UfT2dmZ5ubmrFq1KkePHp3xuKmpqWzatClLlizJwoULc9ttt+WNN964mFEAgCvIBQfK4cOHs2vXrvzWb/3WjONbt27Ntm3bsmPHjhw+fDgdHR1Zu3ZtJiYmqmv6+/uzb9++7N27NwcPHszJkydz66235syZMxd+JgDAFeOCAuXkyZO555578s1vfjPXXntt9XilUsn27duzZcuW3Hnnnenp6ckzzzyTn/3sZ9mzZ0+SZGxsLE899VS++tWvZs2aNbnxxhuze/fu/PCHP8xLL710ac4KAJjTLihQHnjggXzuc5/LmjVrZhx/7bXXMjw8nL6+vuqxxsbG3HzzzTl06FCS5MiRIzl9+vSMNZ2dnenp6amuOdfU1FTGx8dn3ACAK1fDbB+wd+/efP/738/hw4fPu294eDhJ0t7ePuN4e3t7Xn/99eqa+fPnz9h5eWfNO48/1+DgYB577LHZjgoAzFGz2kE5fvx4/uRP/iS7d+9OU1PTu66rq6ub8X2lUjnv2Lnea83mzZszNjZWvR0/fnw2YwMAc8ysAuXIkSMZGRlJb29vGhoa0tDQkAMHDuRv//Zv09DQUN05OXcnZGRkpHpfR0dHpqenMzo6+q5rztXY2JjW1tYZNwDgyjWrQFm9enV++MMf5pVXXqneli9fnnvuuSevvPJKPvrRj6ajoyNDQ0PVx0xPT+fAgQNZuXJlkqS3tzfz5s2bsebEiRN59dVXq2sAgKvbrF6D0tLSkp6enhnHFi5cmMWLF1eP9/f3Z2BgIN3d3enu7s7AwEAWLFiQu+++O0nS1taW++67Lw899FAWL16cRYsW5eGHH86yZcvOe9EtAHB1mvWLZH+ZRx55JJOTk9m4cWNGR0ezYsWKvPjii2lpaamuefLJJ9PQ0JD169dncnIyq1evztNPP536+vpLPQ4AMAfVVSqVSq2HmK3x8fG0tbVlbGzM61EAYI6YzfO3z+IBAIojUACA4ggUAKA4AgUAKI5AAQCKI1AAgOIIFACgOAIFACiOQAEAiiNQAIDiCBQAoDgCBQAojkABAIojUACA4ggUAKA4AgUAKI5AAQCKI1AAgOIIFACgOAIFACiOQAEAiiNQAIDiCBQAoDgCBQAojkABAIojUACA4ggUAKA4AgUAKI5AAQCKI1AAgOIIFACgOAIFACiOQAEAiiNQAIDiCBQAoDgCBQAojkABAIojUACA4ggUAKA4AgUAKI5AAQCKI1AAgOIIFACgOAIFACiOQAEAiiNQAIDiCBQAoDgCBQAojkABAIojUACA4ggUAKA4AgUAKI5AAQCKI1AAgOIIFACgOAIFACiOQAEAiiNQAIDiCBQAoDgCBQAojkABAIojUACA4ggUAKA4AgUAKI5AAQCKM6tAGRwczCc/+cm0tLTkuuuuy+23354f/ehHM9ZUKpU8+uij6ezsTHNzc1atWpWjR4/OWDM1NZVNmzZlyZIlWbhwYW677ba88cYbF382AMAVYVaBcuDAgTzwwAP5l3/5lwwNDeWtt95KX19f/vd//7e6ZuvWrdm2bVt27NiRw4cPp6OjI2vXrs3ExER1TX9/f/bt25e9e/fm4MGDOXnyZG699dacOXPm0p0ZADBn1VUqlcqFPvh//ud/ct111+XAgQP57d/+7VQqlXR2dqa/vz9f/vKXk7y9W9Le3p4nnngiGzZsyNjYWH7t134tzz77bO66664kyZtvvpmurq688MILueWWW37pvzs+Pp62traMjY2ltbX1QscHAC6j2Tx/X9RrUMbGxpIkixYtSpK89tprGR4eTl9fX3VNY2Njbr755hw6dChJcuTIkZw+fXrGms7OzvT09FTXnGtqairj4+MzbgDAleuCA6VSqeTBBx/MZz7zmfT09CRJhoeHkyTt7e0z1ra3t1fvGx4ezvz583Pttde+65pzDQ4Opq2trXrr6uq60LEBgDngggPlS1/6Uv793/89zz333Hn31dXVzfi+Uqmcd+xc77Vm8+bNGRsbq96OHz9+oWMDAHPABQXKpk2b8u1vfzv/9E//lA9/+MPV4x0dHUly3k7IyMhIdVelo6Mj09PTGR0dfdc152psbExra+uMGwBw5ZpVoFQqlXzpS1/K3//93+cf//Efs3Tp0hn3L126NB0dHRkaGqoem56ezoEDB7Jy5cokSW9vb+bNmzdjzYkTJ/Lqq69W1wAAV7eG2Sx+4IEHsmfPnvzDP/xDWlpaqjslbW1taW5uTl1dXfr7+zMwMJDu7u50d3dnYGAgCxYsyN13311de9999+Whhx7K4sWLs2jRojz88MNZtmxZ1qxZc+nPEACYc2YVKDt37kySrFq1asbxb33rW/nDP/zDJMkjjzySycnJbNy4MaOjo1mxYkVefPHFtLS0VNc/+eSTaWhoyPr16zM5OZnVq1fn6aefTn19/cWdDQBwRbio90GpFe+DAgBzz2V7HxQAgA+CQAEAiiNQAIDiCBQAoDgCBQAojkABAIojUACA4ggUAKA4AgUAKI5AAQCKI1AAgOIIFACgOAIFACiOQAEAiiNQAIDiCBQAoDgCBQAojkABAIojUACA4ggUAKA4AgUAKI5AAQCKI1AAgOIIFACgOAIFACiOQAEAiiNQAIDiCBQAoDgCBQAojkABAIojUACA4ggUAKA4AgUAKI5AAQCKI1AAgOIIFACgOAIFACiOQAEAiiNQAIDiCBQAoDgCBQAojkABAIojUACA4ggUAKA4AgUAKI5AAQCKI1AAgOIIFACgOAIFACiOQAEAiiNQAIDiCBQAoDgCBQAojkABAIojUACA4ggUAKA4AgUAKI5AAQCKI1AAgOIIFACgOAIFACiOQAEAiiNQAIDiCBQAoDgNtR4A4OetWrWq+vV3v/vdms0B1FZNd1C+/vWvZ+nSpWlqakpvb2++973v1XIcoMZ+Pk5+0ffA1aNmgfL888+nv78/W7Zsycsvv5zPfvazWbduXY4dO1arkQCAQtRVKpVKLf7hFStW5KabbsrOnTurx2644YbcfvvtGRwcfM/Hjo+Pp62tLWNjY2ltbf2gRwUug/faLXGpB64Ms3n+rskOyvT0dI4cOZK+vr4Zx/v6+nLo0KHz1k9NTWV8fHzGDbhyrF279qLuB648NQmUn/zkJzlz5kza29tnHG9vb8/w8PB56wcHB9PW1la9dXV1Xa5Rgcvg9OnTF3U/cOWp6Ytk6+rqZnxfqVTOO5YkmzdvztjYWPV2/PjxyzUicBnMmzfvou4Hrjw1CZQlS5akvr7+vN2SkZGR83ZVkqSxsTGtra0zbsCVY2ho6KLuB648NQmU+fPnp7e397z/6QwNDWXlypW1GAmosXd7IawXyMLVqWZv1Pbggw/m3nvvzfLly/OpT30qu3btyrFjx3L//ffXaiQAoBA1C5S77rorP/3pT/P444/nxIkT6enpyQsvvJDrr7++ViMBNfbd737XO8kCSWr4PigXw/ugAMDcU/z7oAAAvBeBAgAUR6AAAMURKABAcQQKAFAcgQIAFEegAADFESgAQHEECgBQnJq91f3FeOfNb8fHx2s8CQDwfr3zvP1+3sR+TgbKxMREkqSrq6vGkwAAszUxMZG2trb3XDMnP4vn7NmzefPNN9PS0pK6urpajwNcQuPj4+nq6srx48d91hZcYSqVSiYmJtLZ2ZlrrnnvV5nMyUABrlw+DBRIvEgWACiQQAEAiiNQgKI0Njbmr//6r9PY2FjrUYAa8hoUAKA4dlAAgOIIFACgOAIFACiOQAEAiiNQgKJ8/etfz9KlS9PU1JTe3t5873vfq/VIQA0IFKAYzz//fPr7+7Nly5a8/PLL+exnP5t169bl2LFjtR4NuMz8mTFQjBUrVuSmm27Kzp07q8duuOGG3H777RkcHKzhZMDlZgcFKML09HSOHDmSvr6+Gcf7+vpy6NChGk0F1IpAAYrwk5/8JGfOnEl7e/uM4+3t7RkeHq7RVECtCBSgKHV1dTO+r1Qq5x0DrnwCBSjCkiVLUl9ff95uycjIyHm7KsCVT6AARZg/f356e3szNDQ04/jQ0FBWrlxZo6mAWmmo9QAA73jwwQdz7733Zvny5fnUpz6VXbt25dixY7n//vtrPRpwmQkUoBh33XVXfvrTn+bxxx/PiRMn0tPTkxdeeCHXX399rUcDLjPvgwIAFMdrUACA4ggUAKA4AgUAKI5AAQCKI1AAgOIIFACgOAIFACiOQAEAiiNQAIDiCBQAoDgCBQAojkABAIrz/wGL2HnZtuCMvgAAAABJRU5ErkJggg==", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "sns.boxplot(data['DMA'])\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 274, + "id": "19e1abda", + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "count 95280.000000\n", + "mean 664.004072\n", + "std 116.363600\n", + "min 0.000000\n", + "25% 561.000000\n", + "50% 635.000000\n", + "75% 801.000000\n", + "max 881.000000\n", + "Name: DMA, dtype: float64" + ] + }, + "execution_count": 274, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data['DMA'].describe()" + ] + }, + { + "cell_type": "code", + "execution_count": 275, + "id": "c3a37da5", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "min: 0.0, Lower Bound: 201.0, Q1: 561.0, Q2: 635.0, Q3: 801.0, max: 881.0\n" + ] + } + ], + "source": [ + "# GROUP VALUES AND ASSIGN BINS FOR THE DISTRIBUTION\n", + "\n", + "# Calculate quantiles and IQR for the DMA column\n", + "min_val_dma = data['DMA'].min()\n", + "Q1_dma = data['DMA'].quantile(0.25)\n", + "Q2_dma = data['DMA'].median()\n", + "Q3_dma = data['DMA'].quantile(0.75)\n", + "IQR_dma = Q3_dma - Q1_dma\n", + "max_val_dma = data['DMA'].max()\n", + "lower_bound_dma = Q1_dma - 1.5 * IQR_dma\n", + "\n", + "print(f\"min: {min_val_dma}, Lower Bound: {lower_bound_dma}, Q1: {Q1_dma}, Q2: {Q2_dma}, Q3: {Q3_dma}, max: {max_val_dma}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 276, + "id": "93b2d43d", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Counts in each group:\n", + "DMA_bin\n", + "Outliers 187\n", + "Lower Bound 23635\n", + "Q1 24069\n", + "Q3 23907\n", + "Upper Bound 23482\n", + "dtype: int64\n", + "\n" + ] + } + ], + "source": [ + "# Define bins and labels\n", + "bins_dma = [min_val_dma, lower_bound_dma, Q1_dma, Q2_dma, Q3_dma, max_val_dma] \n", + "labels_dma = ['Outliers','Lower Bound', 'Q1', 'Q3', 'Upper Bound'] \n", + "\n", + "# Create a new column with the binned ranges\n", + "data['DMA_bin'] = pd.cut(data['DMA'], bins=bins_dma, labels=labels_dma, include_lowest=True)\n", + "\n", + "# Group by the new 'DMA_bin' column\n", + "grouped_dma = data.groupby('DMA_bin')\n", + "\n", + "# Get counts for each group\n", + "print(\"Counts in each group:\")\n", + "print(grouped_dma.size())\n", + "print()" + ] + }, + { + "cell_type": "code", + "execution_count": 277, + "id": "a476fb5d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 3\n", + "1 4\n", + "2 1\n", + "3 4\n", + "4 1\n", + " ..\n", + "95407 3\n", + "95408 2\n", + "95409 1\n", + "95410 4\n", + "95411 1\n", + "Name: DMA, Length: 95280, dtype: category\n", + "Categories (5, int64): [0 < 1 < 2 < 3 < 4]" + ] + }, + "execution_count": 277, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Define the ordinal encoding for each variable\n", + "dma_mapping = {'Outliers':0, 'Lower Bound': 1, 'Q1': 2, 'Q3': 3, 'Upper Bound': 4}\n", + "\n", + "# Apply ordinal encoding with the custom mappings\n", + "data['DMA'] = data['DMA_bin'].map(dma_mapping)\n", + "\n", + "# Drop the 'DMA_bin' column if no longer needed\n", + "data.drop('DMA_bin', axis=1, inplace=True)\n", + "data['DMA']" + ] + }, + { + "cell_type": "markdown", + "id": "ab6a8c40", + "metadata": {}, + "source": [ + "### MSA" + ] + }, + { + "cell_type": "code", + "execution_count": 278, + "id": "30d4e872", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "MSA\n", + "0.0 21333\n", + "4480.0 4606\n", + "1600.0 4059\n", + "2160.0 2586\n", + "520.0 1685\n", + " ... \n", + "9140.0 1\n", + "3200.0 1\n", + "9280.0 1\n", + "743.0 1\n", + "8480.0 1\n", + "Name: count, Length: 298, dtype: int64\n", + "float64\n", + "0\n" + ] + } + ], + "source": [ + "print (data['MSA'].value_counts())\n", + "print (data['MSA'].dtype)\n", + "print (data['MSA'].isna().sum())\n", + "# some values have very low counts and 0 has a very high count" + ] + }, + { + "cell_type": "code", + "execution_count": 279, + "id": "62fbf77d", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAlIAAAGwCAYAAABiu4tnAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8pXeV/AAAACXBIWXMAAA9hAAAPYQGoP6dpAABTyUlEQVR4nO3deVyVVeI/8M/lrqyXTbiArO6IG6CEuy2uU2o2URnZNPnNacyFqa9l67eZMuc34zSO2zRjOk6l1rhkZSWWoga5AOKGO7IJsl/2y3LP7w/kTgQiPFzuZfm8X6/7Kp57nuec54DcD+c5z3lkQggBIiIiImo3G2s3gIiIiKi7YpAiIiIikohBioiIiEgiBikiIiIiiRikiIiIiCRikCIiIiKSiEGKiIiISCKFtRvQkxmNRty8eROOjo6QyWTWbg4RERG1gRACZWVl8Pb2ho1N62NODFKd6ObNm/D19bV2M4iIiEiCzMxM9O3bt9UyDFKdyNHREUDDN8LJycnKrSEiIqK2KC0tha+vr+lzvDUMUp2o8XKek5MTgxQREVE305ZpOZxsTkRERCQRgxQRERGRRAxSRERERBIxSBERERFJZPUgtWHDBgQGBkKj0SAsLAxHjx5ttXxcXBzCwsKg0WgQFBSETZs2NSuza9cuBAcHQ61WIzg4GHv27Gny/pEjR/Dggw/C29sbMpkMe/fubXYMIQTeeusteHt7w9bWFpMnT8b58+c7dK5ERETUs1g1SO3cuRPLli3Dq6++iuTkZEyYMAEzZsxARkZGi+XT0tIwc+ZMTJgwAcnJyVi5ciWWLFmCXbt2mcokJCQgKioK0dHRSElJQXR0NB599FEcP37cVKaiogIjRozAunXr7ti2P/7xj1izZg3WrVuHkydPQqfT4YEHHkBZWZn5OoCIiIi6NZkQQlir8oiICISGhmLjxo2mbUOGDMGcOXOwatWqZuVXrFiBffv2ITU11bRt0aJFSElJQUJCAgAgKioKpaWl+Prrr01lpk+fDhcXF2zfvr3ZMWUyGfbs2YM5c+aYtgkh4O3tjWXLlmHFihUAAIPBAE9PT6xevRrPPfdci+djMBhgMBhMXzeuQ6HX67n8ARERUTdRWloKrVbbps9vq41I1dTUIDExEVOnTm2yferUqYiPj29xn4SEhGblp02bhlOnTqG2trbVMnc6ZkvS0tKQm5vb5DhqtRqTJk1q9TirVq2CVqs1vbiqORERUc9mtSBVUFCA+vp6eHp6Ntnu6emJ3NzcFvfJzc1tsXxdXR0KCgpaLXOnY96pnsb92nOcV155BXq93vTKzMxsc51ERETU/Vh9ZfOfrxoqhGh1JdGWyv98e3uPaa62qdVqqNXqdtdDRERE3ZPVRqTc3d0hl8ubjfDk5eU1GwlqpNPpWiyvUCjg5ubWapk7HfNO9QDo8HGIiIioZ7NakFKpVAgLC0NsbGyT7bGxsRg7dmyL+0RGRjYrf+DAAYSHh0OpVLZa5k7HbElgYCB0Ol2T49TU1CAuLq5dxyEiIqKezaqX9mJiYhAdHY3w8HBERkbigw8+QEZGBhYtWgSgYc5RdnY2tm3bBqDhDr1169YhJiYGCxcuREJCAjZv3tzkbrylS5di4sSJWL16NWbPno3PP/8cBw8exLFjx0xlysvLcfXqVdPXaWlpOH36NFxdXeHn5weZTIZly5bh3XffxYABAzBgwAC8++67sLOzwxNPPGGh3iEiIqIuT1jZ+vXrhb+/v1CpVCI0NFTExcWZ3luwYIGYNGlSk/KHDx8Wo0aNEiqVSgQEBIiNGzc2O+Znn30mBg0aJJRKpRg8eLDYtWtXk/cPHTokADR7LViwwFTGaDSKN998U+h0OqFWq8XEiRPF2bNn23Vuer1eABB6vb5d+xEREZH1tOfz26rrSPV07VmHgoiIiLqG9nx+W/2uPeoZPjne8mr0P/VEhJ8FWkJERGQ5Vn/WHhEREVF3xSBFREREJBGDFBEREZFEDFJEREREEjFIEREREUnEIEVEREQkEYMUERERkUQMUkREREQSMUgRERERScQgRURERCQRgxQRERGRRAxSRERERBIxSBERERFJxCBFREREJBGDFBEREZFEDFJEREREEjFIEREREUnEIEVEREQkEYMUERERkUQMUkREREQSMUgRERERScQgRURERCQRgxQRERGRRAxSRERERBIxSBERERFJxCBFREREJBGDFBEREZFEDFJEREREEjFIEREREUnEIEVEREQkEYMUERERkUQMUkREREQSMUgRERERScQgRURERCQRgxQRERGRRAxSRERERBIxSBERERFJxCBFREREJBGDFBEREZFEDFJEREREEjFIEREREUnEIEVEREQkEYMUERERkUQMUkREREQSMUgRERERScQgRURERCQRgxQRERGRRAxSRERERBIxSBERERFJxCBFREREJBGDFBEREZFEDFJEREREEjFIEREREUnEIEVEREQkEYMUERERkUQMUkREREQSMUgRERERScQgRURERCQRgxQRERGRRAxSRERERBJZPUht2LABgYGB0Gg0CAsLw9GjR1stHxcXh7CwMGg0GgQFBWHTpk3NyuzatQvBwcFQq9UIDg7Gnj172l1veXk5Fi9ejL59+8LW1hZDhgzBxo0bO3ayRERE1KNYNUjt3LkTy5Ytw6uvvork5GRMmDABM2bMQEZGRovl09LSMHPmTEyYMAHJyclYuXIllixZgl27dpnKJCQkICoqCtHR0UhJSUF0dDQeffRRHD9+vF31Ll++HN988w0++ugjpKamYvny5XjhhRfw+eefd16HEBERUbciE0IIa1UeERGB0NDQJiM9Q4YMwZw5c7Bq1apm5VesWIF9+/YhNTXVtG3RokVISUlBQkICACAqKgqlpaX4+uuvTWWmT58OFxcXbN++vc31hoSEICoqCq+//rqpTFhYGGbOnInf//73LZ6PwWCAwWAwfV1aWgpfX1/o9Xo4OTm1q2+6m0+Otxx+f+qJCD8LtISIiKhjSktLodVq2/T5bbURqZqaGiQmJmLq1KlNtk+dOhXx8fEt7pOQkNCs/LRp03Dq1CnU1ta2WqbxmG2td/z48di3bx+ys7MhhMChQ4dw+fJlTJs27Y7ntGrVKmi1WtPL19f3Lr1ARERE3ZnVglRBQQHq6+vh6enZZLunpydyc3Nb3Cc3N7fF8nV1dSgoKGi1TOMx21rv2rVrERwcjL59+0KlUmH69OnYsGEDxo8ff8dzeuWVV6DX602vzMzMu/QCERERdWcKazdAJpM1+VoI0Wzb3cr/fHtbjnm3MmvXrsWPP/6Iffv2wd/fH0eOHMHzzz8PLy8v3H///S22Ta1WQ61W37HtRERE1LNYLUi5u7tDLpc3G33Ky8trNlrUSKfTtVheoVDAzc2t1TKNx2xLvVVVVVi5ciX27NmDWbNmAQCGDx+O06dP409/+tMdgxQRERH1Lla7tKdSqRAWFobY2Ngm22NjYzF27NgW94mMjGxW/sCBAwgPD4dSqWy1TOMx21JvbW0tamtrYWPTtHvkcjmMRmM7z5SIiIh6Kqte2ouJiUF0dDTCw8MRGRmJDz74ABkZGVi0aBGAhjlH2dnZ2LZtG4CGO/TWrVuHmJgYLFy4EAkJCdi8ebPpbjwAWLp0KSZOnIjVq1dj9uzZ+Pzzz3Hw4EEcO3aszfU6OTlh0qRJeOmll2Brawt/f3/ExcVh27ZtWLNmjQV7iIiIiLoyqwapqKgoFBYW4u2330ZOTg5CQkKwf/9++Pv7AwBycnKarO0UGBiI/fv3Y/ny5Vi/fj28vb2xdu1azJs3z1Rm7Nix2LFjB1577TW8/vrr6NevH3bu3ImIiIg21wsAO3bswCuvvIL58+ejqKgI/v7+eOedd0xhi4iIiMiq60j1dO1Zh6K74zpSRETUU3SLdaSIiIiIujsGKSIiIiKJGKSIiIiIJGKQIiIiIpKIQYqIiIhIIgYpIiIiIokYpIiIiIgkYpAiIiIikohBioiIiEgiBikiIiIiiRikiIiIiCRikCIiIiKSiEGKiIiISCIGKSIiIiKJGKSIiIiIJGKQIiIiIpKIQYqIiIhIIgYpIiIiIokYpIiIiIgkYpAiIiIikohBioiIiEgiBikiIiIiiRikiIiIiCRikCIiIiKSiEGKiIiISCIGKSIiIiKJGKSIiIiIJGKQIiIiIpKIQYqIiIhIIgYpIiIiIokYpIiIiIgkYpAiIiIikohBioiIiEgiBikiIiIiiRikiIiIiCRikCIiIiKSiEGKiIiISCIGKSIiIiKJGKSIiIiIJGKQIiIiIpKIQYqIiIhIIgYpIiIiIokYpIiIiIgkYpAiIiIikohBioiIiEgiBikiIiIiiRikiIiIiCRikCIiIiKSiEGKiIiISCIGKSIiIiKJGKSIiIiIJGKQIiIiIpKIQYqIiIhIIgYpIiIiIokYpIiIiIgkYpAiIiIikohBioiIiEgiBikiIiIiiRikiIiIiCRikCIiIiKSiEGKiIiISCKrB6kNGzYgMDAQGo0GYWFhOHr0aKvl4+LiEBYWBo1Gg6CgIGzatKlZmV27diE4OBhqtRrBwcHYs2ePpHpTU1Px0EMPQavVwtHREffccw8yMjKknywRERH1KFYNUjt37sSyZcvw6quvIjk5GRMmTMCMGTPuGFbS0tIwc+ZMTJgwAcnJyVi5ciWWLFmCXbt2mcokJCQgKioK0dHRSElJQXR0NB599FEcP368XfVeu3YN48ePx+DBg3H48GGkpKTg9ddfh0aj6bwOISIiom5FJoQQ1qo8IiICoaGh2Lhxo2nbkCFDMGfOHKxatapZ+RUrVmDfvn1ITU01bVu0aBFSUlKQkJAAAIiKikJpaSm+/vprU5np06fDxcUF27dvb3O9jz32GJRKJf7973+3+XwMBgMMBoPp69LSUvj6+kKv18PJyanNx+mOPjl+95G6JyL8LNASIiKijiktLYVWq23T57fVRqRqamqQmJiIqVOnNtk+depUxMfHt7hPQkJCs/LTpk3DqVOnUFtb22qZxmO2pV6j0YivvvoKAwcOxLRp0+Dh4YGIiAjs3bu31XNatWoVtFqt6eXr69t6JxAREVG3ZrUgVVBQgPr6enh6ejbZ7unpidzc3Bb3yc3NbbF8XV0dCgoKWi3TeMy21JuXl4fy8nK89957mD59Og4cOIC5c+fi4YcfRlxc3B3P6ZVXXoFerze9MjMz29ATRERE1F0prN0AmUzW5GshRLNtdyv/8+1tOWZrZYxGIwBg9uzZWL58OQBg5MiRiI+Px6ZNmzBp0qQW26ZWq6FWq+/YdiIiIupZrDYi5e7uDrlc3mz0KS8vr9loUSOdTtdieYVCATc3t1bLNB6zLfW6u7tDoVAgODi4SZkhQ4bwrj0iIiIysVqQUqlUCAsLQ2xsbJPtsbGxGDt2bIv7REZGNit/4MABhIeHQ6lUtlqm8ZhtqVelUmH06NG4dOlSkzKXL1+Gv79/O8+UiIiIeiqrXtqLiYlBdHQ0wsPDERkZiQ8++AAZGRlYtGgRgIY5R9nZ2di2bRuAhjv01q1bh5iYGCxcuBAJCQnYvHmz6W48AFi6dCkmTpyI1atXY/bs2fj8889x8OBBHDt2rM31AsBLL72EqKgoTJw4EVOmTME333yDL774AocPH7ZM5xAREVGXZ9UgFRUVhcLCQrz99tvIyclBSEgI9u/fbxr1ycnJaXIpLTAwEPv378fy5cuxfv16eHt7Y+3atZg3b56pzNixY7Fjxw689tpreP3119GvXz/s3LkTERERba4XAObOnYtNmzZh1apVWLJkCQYNGoRdu3Zh/PjxFugZIiIi6g6suo5UT9eedSi6O64jRUREPUWnryOVlpYmqWFEREREPYmkINW/f39MmTIFH330Eaqrq83dJiIiIqJuQVKQSklJwahRo/C73/0OOp0Ozz33HE6cOGHuthERERF1aZKCVEhICNasWYPs7Gxs2bIFubm5GD9+PIYOHYo1a9YgPz/f3O0kIiIi6nI6tI6UQqHA3Llz8emnn2L16tW4du0aXnzxRfTt2xdPPfUUcnJyzNVOIiIioi6nQ0Hq1KlTeP755+Hl5YU1a9bgxRdfxLVr1/D9998jOzsbs2fPNlc7iYiIiLocSetIrVmzBlu2bMGlS5cwc+ZMbNu2DTNnzoSNTUMuCwwMxN///ncMHjzYrI0lIiIi6kokBamNGzfimWeewa9+9SvodLoWy/j5+WHz5s0dahwRERFRVyYpSMXGxsLPz880AtVICIHMzEz4+flBpVJhwYIFZmkkERERUVckaY5Uv379UFBQ0Gx7UVERAgMDO9woIiIiou5AUpC601NlysvLodFoOtQgIiIiou6iXZf2YmJiAAAymQxvvPEG7OzsTO/V19fj+PHjGDlypFkbSERERNRVtStIJScnA2gYkTp79ixUKpXpPZVKhREjRuDFF180bwuJiIiIuqh2BalDhw4BAH71q1/hr3/9612fiExERETUk0m6a2/Lli3mbgcRERFRt9PmIPXwww9j69atcHJywsMPP9xq2d27d3e4YURERERdXZuDlFarhUwmM/0/ERERUW/X5iD108t5vLRHREREJHEdqaqqKlRWVpq+Tk9Px/vvv48DBw6YrWFEREREXZ2kIDV79mxs27YNAFBSUoIxY8bgz3/+M2bPno2NGzeatYFEREREXZWkIJWUlIQJEyYAAP7zn/9Ap9MhPT0d27Ztw9q1a83aQCIiIqKuSlKQqqyshKOjIwDgwIEDePjhh2FjY4N77rkH6enpZm0gERERUVclKUj1798fe/fuRWZmJr799ltMnToVAJCXl8dFOomIiKjXkBSk3njjDbz44osICAhAREQEIiMjATSMTo0aNcqsDSQiIiLqqiStbP7II49g/PjxyMnJwYgRI0zb77vvPsydO9dsjSMiIiLqyiQFKQDQ6XTQ6XRNto0ZM6bDDSIiIiLqLiQFqYqKCrz33nv47rvvkJeXB6PR2OT969evm6VxRERERF2ZpCD17LPPIi4uDtHR0fDy8jI9OoaIiIioN5EUpL7++mt89dVXGDdunLnbQ0RERNRtSLprz8XFBa6uruZuCxEREVG3IilI/f73v8cbb7zR5Hl7RERERL2NpEt7f/7zn3Ht2jV4enoiICAASqWyyftJSUlmaRwRERFRVyYpSM2ZM8fMzSAiIiLqfiQFqTfffNPc7SAiIiLqdiTNkQKAkpIS/POf/8Qrr7yCoqIiAA2X9LKzs83WOCIiIqKuTNKI1JkzZ3D//fdDq9Xixo0bWLhwIVxdXbFnzx6kp6dj27Zt5m4nERERUZcjaUQqJiYGTz/9NK5cuQKNRmPaPmPGDBw5csRsjSMiIiLqyiQFqZMnT+K5555rtt3Hxwe5ubkdbhQRERFRdyApSGk0GpSWljbbfunSJfTp06fDjSIiIiLqDiQFqdmzZ+Ptt99GbW0tAEAmkyEjIwMvv/wy5s2bZ9YGEhEREXVVkoLUn/70J+Tn58PDwwNVVVWYNGkS+vfvD0dHR7zzzjvmbiMRERFRlyTprj0nJyccO3YMhw4dQmJiIoxGI0JDQ3H//febu31EREREXVa7g5TRaMTWrVuxe/du3LhxAzKZDIGBgdDpdBBCQCaTdUY7iYiIiLqcdl3aE0LgoYcewrPPPovs7GwMGzYMQ4cORXp6Op5++mnMnTu3s9pJRERE1OW0a0Rq69atOHLkCL777jtMmTKlyXvff/895syZg23btuGpp54yayOJiIiIuqJ2jUht374dK1eubBaiAODee+/Fyy+/jI8//thsjSMiIiLqytoVpM6cOYPp06ff8f0ZM2YgJSWlw40iIiIi6g7aFaSKiorg6el5x/c9PT1RXFzc4UYRERERdQftClL19fVQKO48rUoul6Ourq7DjSIiIiLqDto12VwIgaeffhpqtbrF9w0Gg1kaRURERNQdtCtILViw4K5leMceERER9RbtClJbtmzprHYQERERdTuSnrVHRERERAxSRERERJIxSBERERFJxCBFREREJBGDFBEREZFEDFJEREREEjFIEREREUnEIEVEREQkkdWD1IYNGxAYGAiNRoOwsDAcPXq01fJxcXEICwuDRqNBUFAQNm3a1KzMrl27EBwcDLVajeDgYOzZs6dD9T733HOQyWR4//33231+RERE1HNZNUjt3LkTy5Ytw6uvvork5GRMmDABM2bMQEZGRovl09LSMHPmTEyYMAHJyclYuXIllixZgl27dpnKJCQkICoqCtHR0UhJSUF0dDQeffRRHD9+XFK9e/fuxfHjx+Ht7W3+DiAiIqJuTSaEENaqPCIiAqGhodi4caNp25AhQzBnzhysWrWqWfkVK1Zg3759SE1NNW1btGgRUlJSkJCQAACIiopCaWkpvv76a1OZ6dOnw8XFBdu3b29XvdnZ2YiIiMC3336LWbNmYdmyZVi2bFmbz6+0tBRarRZ6vR5OTk5t3q87+uR4y+H3p56I8LNAS4iIiDqmPZ/fVhuRqqmpQWJiIqZOndpk+9SpUxEfH9/iPgkJCc3KT5s2DadOnUJtbW2rZRqP2dZ6jUYjoqOj8dJLL2Ho0KFtOieDwYDS0tImLyIiIuq5rBakCgoKUF9fD09PzybbPT09kZub2+I+ubm5LZavq6tDQUFBq2Uaj9nWelevXg2FQoElS5a0+ZxWrVoFrVZrevn6+rZ5XyIiIup+rD7ZXCaTNflaCNFs293K/3x7W47ZWpnExET89a9/xdatW1tty8+98sor0Ov1pldmZmab9yUiIqLux2pByt3dHXK5vNnoU15eXrPRokY6na7F8gqFAm5ubq2WaTxmW+o9evQo8vLy4OfnB4VCAYVCgfT0dPzud79DQEDAHc9JrVbDycmpyYuIiIh6LqsFKZVKhbCwMMTGxjbZHhsbi7Fjx7a4T2RkZLPyBw4cQHh4OJRKZatlGo/Zlnqjo6Nx5swZnD592vTy9vbGSy+9hG+//Vb6SRMREVGPorBm5TExMYiOjkZ4eDgiIyPxwQcfICMjA4sWLQLQcKksOzsb27ZtA9Bwh966desQExODhQsXIiEhAZs3bzbdjQcAS5cuxcSJE7F69WrMnj0bn3/+OQ4ePIhjx461uV43NzfTCFcjpVIJnU6HQYMGdXa3EBERUTdh1SAVFRWFwsJCvP3228jJyUFISAj2798Pf39/AEBOTk6TtZ0CAwOxf/9+LF++HOvXr4e3tzfWrl2LefPmmcqMHTsWO3bswGuvvYbXX38d/fr1w86dOxEREdHmeomIiIjawqrrSPV0XEeqKa4jRURE3UG3WEeKiIiIqLtjkCIiIiKSiEGKiIiISCIGKSIiIiKJGKSIiIiIJGKQIiIiIpKIQYqIiIhIIgYpIiIiIokYpIiIiIgkYpAiIiIikohBioiIiEgiBikiIiIiiRikiIiIiCRikCIiIiKSiEGKiIiISCIGKSIiIiKJGKSIiIiIJGKQIiIiIpKIQYo6TF9Vi6/P5iA5o9jaTSEiIrIohbUbQN1bak4pFn2UiPTCSgDArdJqTB2qg41MZuWWERERdT6OSJFkSRnFeHhDPNILK2GvkgMAjlwpwK7ELAghrNw6IiKizscgRZKt/e4KqmrrERnkhuX3D8QjoX1hIwOSM0twNb/c2s0jIiLqdAxSJElmUSXiLucDAFY9PAx2agVC/V0wJtANAPDj9SJrNo+IiMgiGKRIko+PZ0AIYMIAdwS425u2RwS6AgAu5pSipLLGWs0jIiKyCAYpajdDXT0+O5UJAJgf4d/kPU8nDYLc7SEAnEjjqBQREfVsDFLUbt+cy0VhRQ10ThrcP8Sj2fv3BDVc3jt5owh19UZLN4+IiMhiGKSo3f6TmAUAeGyMLxTy5j9CQ7yc4KRRoKKmHhdySi3dPCIiIothkKJ2MdTVmy7ZzRrm1WIZuY0MI3ydAQCXcsss1TQiIiKLY5CidknOKIGhzog+jmr093C4Y7kBHo4AgCt55TByTSkiIuqhGKSoXeKvFQIAIoPcIGtl9fIANzso5TKUG+qQq6+2VPOIiIgsikGK2iXhWgEAILKfW6vlFHIbBLk3jFhdyePinERE1DMxSFGbVdbU4XRmCQBg7F2CFAAM8LwdpG5xnhQREfVMDFLUZqduFKO2XsBbq4Gfq91dyw/0bJgnlV5YCUNdfWc3j4iIyOIYpKjNEq43zI+6p1/r86Maudmr4GKnRL0QuJ5f0dnNIyIisjgGKWqzxonmY/u5t6m8TCbDAM/Gu/d4eY+IiHoeBilqk8qaOpzL1gO4+0Tzn+rfp2Ge1I2Cyk5pFxERkTUxSFGbXLhZinqjQB9HNXycbdu8n59bw1yqW6XVKK2u7azmERERWQWDFLXJ2dujUcN8tO3az0mjhIudEgLA6YwS8zeMiIjIihikqE0ag1RIO4MUAPi72QMATqUXm7VNRERE1sYgRW1yTuKIFADTUglJDFJERNTDMEjRXVXW1OHq7dXJpQQp/9vzpJIzilFXbzRr24iIiKyJQYruKjWnFEYBuDuo4emkbvf+nk4aqBU2qKipxyWuck5ERD0IgxTd1dmsxst6Tm1aiPPnbGQyXt4jIqIeiUGK7upsdikAaZf1GjUug8AJ50RE1JMwSNFdnb8p/Y69Rv6ut+/cu8EgRUREPQeDFLWqurYeVxonmveVHqR8XWwhkwHZJVXILzOYq3lERERWxSBFrbqQ07CiubuDCjonjeTjqJVyDPBoeFzMmawSM7WOiIjIuhikqFUXcxrushviJW2i+U+N6OsMADidWdLBVhEREXUNDFLUqsu3lysYrHPs8LFG+DoDYJAiIqKeQ2HtBlDXdjG34Y69gZ4dD1IjbweplMwSCCE6PMJlaZ8cz7hrmSci/CzQEiIi6io4IkV3JITApdzGESmnDh9vkM4RaoUNSqvrkFZQ0eHjERERWRuDFN1RQXkNiitrIZMB/W9PFO8IpdzGtIRCCiecExFRD8AgRXfUOBrl72oHW5XcLMdsnHCekqk3y/GIiIisiUGK7qjxuXiDzDDRvNFIP2cAQDInnBMRUQ/AyeZ0R5dvj0gNMsNE80Yjb49Ipd4shaGuHmqFeUa6iMyNNxcQUVtwRIru6OLtEamBZhyR8nW1hYudEjX1RtMaVURERN0VgxS1yGgUuGLGNaQayWQyridFREQ9BoMUtSi7pAqVNfVQyW3g72Zv1mP/dD0pIiKi7oxBilp08fb8qKA+9lDKzftjYhqR4hIIRETUzTFIUYvM+WiYn2tcAuF6fgX0lbVmPz4REZGlMEhRixrXkDLnRPNGrvYq+LnaAQDOZJeY/fhERESWYvUgtWHDBgQGBkKj0SAsLAxHjx5ttXxcXBzCwsKg0WgQFBSETZs2NSuza9cuBAcHQ61WIzg4GHv27GlXvbW1tVixYgWGDRsGe3t7eHt746mnnsLNmzc7fsLdxH8fDWP+IAVwnhQREfUMVg1SO3fuxLJly/Dqq68iOTkZEyZMwIwZM5CR0fL6LWlpaZg5cyYmTJiA5ORkrFy5EkuWLMGuXbtMZRISEhAVFYXo6GikpKQgOjoajz76KI4fP97meisrK5GUlITXX38dSUlJ2L17Ny5fvoyHHnqoczuki6ipM+JafjkA8zysuCX/vXOPK5wTEVH3JRNCCGtVHhERgdDQUGzcuNG0bciQIZgzZw5WrVrVrPyKFSuwb98+pKammrYtWrQIKSkpSEhIAABERUWhtLQUX3/9tanM9OnT4eLigu3bt0uqFwBOnjyJMWPGID09HX5+bVuEr7S0FFqtFnq9Hk5OHX/or6VcvlWGqX85Age1AmffmgqZTHbXfdq7eGFiehHmbUyAu4MaJ1+9r011WBsXaOxd+P0m6r3a8/lttRGpmpoaJCYmYurUqU22T506FfHx8S3uk5CQ0Kz8tGnTcOrUKdTW1rZapvGYUuoFAL1eD5lMBmdn5zuWMRgMKC0tbfLqjhrv2Bvo6dBpAWeotxYKGxkKyg24qa/ulDqIiIg6m9WCVEFBAerr6+Hp6dlku6enJ3Jzc1vcJzc3t8XydXV1KCgoaLVM4zGl1FtdXY2XX34ZTzzxRKvJdNWqVdBqtaaXr6/vHct2ZaZHw3TS/CgA0CjlGOzVcHzOkyIiou7K6pPNfz7iIYRodRSkpfI/396WY7a13traWjz22GMwGo3YsGFDK2cCvPLKK9Dr9aZXZmZmq+W7qsaHFXfW/KhGjRPOk9KLO7UeIiKizmK1IOXu7g65XN5sFCgvL6/ZaFEjnU7XYnmFQgE3N7dWyzQesz311tbW4tFHH0VaWhpiY2Pvep1UrVbDycmpyas7umSBESkACPN3AQAkZjBIERFR92S1IKVSqRAWFobY2Ngm22NjYzF27NgW94mMjGxW/sCBAwgPD4dSqWy1TOMx21pvY4i6cuUKDh48aApqPV1lTR0yiioBAIM6eUQqzM8VAHAuW4/q2vpOrYuIiKgzKKxZeUxMDKKjoxEeHo7IyEh88MEHyMjIwKJFiwA0XCrLzs7Gtm3bADTcobdu3TrExMRg4cKFSEhIwObNm0134wHA0qVLMXHiRKxevRqzZ8/G559/joMHD+LYsWNtrreurg6PPPIIkpKS8OWXX6K+vt40guXq6gqVSmWpLrK4K7calj1wd1DBzUHdqXX5utrC3UGNgnIDzmbrMTrAtVPrIyIiMjerBqmoqCgUFhbi7bffRk5ODkJCQrB//374+/sDAHJycpqsKRUYGIj9+/dj+fLlWL9+Pby9vbF27VrMmzfPVGbs2LHYsWMHXnvtNbz++uvo168fdu7ciYiIiDbXm5WVhX379gEARo4c2aTNhw4dwuTJkzupR6zPUpf1gIZ5auH+LvjmfC4S04sZpIiIqNuxapACgOeffx7PP/98i+9t3bq12bZJkyYhKSmp1WM+8sgjeOSRRyTXGxAQACsur2VVlppo3ijsdpA6daMYmGSRKomIiMzG6kGKupbGhxV39vyoRmEBDRPOkzKK73rHJhFRe3FhVepsVl/+gLqWixa8tAcAQ72doFLYoKiiBmkFFRapk4iIyFwYpMikqKIG+WUGAMAAC41IqRVyjOirBQAkcj0pIiLqZhikyKTxsl5fF1s4qC131Te0cT0pBikiIupmGKTIpPGOvcEWuqzXaLR/w916J9KKLFovERFRRzFIkYml79hrNCbIFTYy4HpBBXL0VRatm4iIqCMYpMjEkmtI/ZSTRolhfZ0BAPFXCy1aNxERUUdw+QMC0PDQ5stWClIAMLafG1IySxB/rRDzwvpavH6i3ohLAxB1HEekCACQo69GmaEOChsZgtwdLF7/2H4NzzJMuFbQaxdDJSKi7odBigD897JeUB97qBSW/7EI93eFSm6Dm/pq3CistHj9REREUjBIEQDrTTRvZKuSY5SfMwAg/lqBVdpARETUXpwjRQDw3/lRVgpSADC2nzuOpxUh/loh5kf4W60dXUVb5q8AnMNCRGRNDFIEwPKPhmnJuP5u+MtBIOFaIYxGARsbPnePegZO6ibquXhpj1BXb8TV/HIA1g1SI3yd4ahRoKiiBqezSqzWDiIiorZikCKkFVSgps4IO5Ucvi52VmuHUm6DyYM8AAAHL9yyWjuIiIjaikGKcP5mKQBgiJeT1S+n3T/kdpBKZZAiIqKuj3OkCOey9QCAEG8nK7cEmDzQAwobGS7fKkd6YQX83eyt3aQOqaypQ46+GlU19RCi4YHQznZKyGQ9f/4X5wWRNVXX1iO/zICbJVVw1Chgr1bAphf8uyPLY5Ai04jUUG+tlVsCaO2UGBPoivhrhYi9cAvPTgiydpParbiiBh/9mI64y/k4nVmCOmPTBUadNAr4ONtioM4RQ7yc4KRRWqmlRD2H0Sjw4/VCHEzNw+HLebieX9HkfYWNDP09HBDq54LBXo5Q2PCCDJkHg1QvJ4TA+ZsNI1JDfaw/IgUALnYqAMDHxzNgp2r5R7QrjmQY6urxp28vYcsPaaioqTdtd1QrYKeWwyiA/DIDSqvrUJpbhtTcMnyRchPB3lpEBrkh0L17j74R3U1nLOlRWVOHHScy8e8f05FW0DQ8qRQ2UMltUGGoQ51R4GJuGS7mlsHZTom5I30wwIrLvVDPwSDVy2UVV6G0ug5KuQwDPLrGL5UhXk746mwO0gsrUFlTd8cw1ZXcKq3GJycykF9mAAAEezkhOtIf4/u7w9f1vxP4q2rqcSWvDOu+v4rUnFJkFlfhXLYe57L16NfHHlODdU3KmwvXpKKexlBXj+3HM7Du0DUUlDf8u3NUKzBjmA5TBnlgTKArXO1V2H4iE/VGgfxyA1IyS5CUXoySylpsib+BMD8XPDTS28pnQt1d1/+Eok7VOBo10NPRKo+GaYmrvQo6Jw1yS6tx/mYpRge4WrtJrTqXrcd/ErNQU2+Ep5Ma//fQUEwN1rU4cd9WJcfwvs6YPMgDkwd5IEdfhR+vFyEpvRjX8iuwMe4aQv1cMDNEBzs1/3kSteTYlQIs3ZGMwooaAA2/M8b3d8coP2eoFXIUV9bi2/P/vWFFbiODzkkD3VAdJg/qgwMXbuHHa4VIzChGQYUBdfUCtir5XevlHxrUkq7xyUlWcy67YX5USBeYH/VTI32dAQBJGcXWbchdnM3WY8fJDNTUGxHUxx5fLZmA6SFebb770Utri7mjfBDzwECE+jlDhoZzXnPwMlK4lhZRE3ll1Vi6IxlPbj6OwooaOKgVmD3SG8vuH4B7gtygVtw9DKkVcjw43Bu/Hh8IjdIG6YWV+MfR6yirrrXAGVBPxCDVy3W1+VGNRvo2hIr0wkoU3h6272rO39Rj58kMGAUQ6ueMZ8YFwt1BLelYLvYqPBLmi+cmBsHTSY3KmnrsPJmJz05lorq2/u4HIOrBjEaBf/+Yjvv+HIfPT9+ETAbcE+SGmAcGIiLQTdLE8aA+Dlg4IQiOagVyS6vxr/gbMNTx3xq1H4NUL9eV7tj7KSdbJfp7OAAAkjJKrNuYFmQUVWLHyUwYRUPoezi0r1lurfZzs8fiKQNw72APyAAkZ5Zg3aGryC6p6nijibqhHH0Voj88jtf3nkNZdR2G+Wjx+W/H4aER3tAo7z4C1RovrS3+Z2IQ7FVy3NRXY/uJDNT/7C5borthkOrF8sqqkVdmgEwGDPHqGhPNfyrUzwUAkJxZDKPoOr/c9FW1+PjHdNQbBYK9nPBImHlCVCO5jQz3D/HE/0wMgrOdEkUVNfh73DUcTyuE6EL9QNTZ9qXcxLS/HMEPVwuhUdrgzQeDsfe34zC8r7PZ6nBzUOOpyAAo5Q3r13155qbZjk29A2ez9mKNo1H9+jh0yTvjgr2doFbYoKSyFmkFFejXx8HaTUJtvREfH09HmaEOOicNfhlu3hD1U/5u9nhhygB8lpiJi7ll+Pz0TdwoqMCcUT5tmgtC0uSXGXDqRhH2n82BvqoWMhmgtLGBl7MGfZ1t4eNiBzkfqN2p9FW1eOPzc/j8dEOoGdFXizVRIzvtd4Cvqx2iwv3w8fF0HE8rgr+bvWmeJtHddL1PT7KY5NuXzIb7dK3Leo2UchsM7+uMkzeKcPx6YZcIUt+cy0VWcRVslXI8eY9/pwcaW5Uc0ff44+iVAhy4kIuULD1ullTjiQg/eDppOrXu3iY5oxj/PJaGb87ltnx55/YKEvZqBUb01SLM38WyDewlruWXY/r7R5Cjr4bcRobfTumPF+7tD6W8cy+gBHs7YfKgPjh0KR97k7PhrdXAg//GqA0YpHqxpPSGO+JGdeEPhMh+bjh5owjnb5aioNwgeTK3OVzMKUXC9UIAQNRoX7jaqyxSr0wmw8SBfeDnaocdJzOQX27AhsNXMWekD0b5dd3vXXeRo6/C219cwNfnck3bBusc4WSrRJ/bP2/VtfXILqlCemElKgx1iL9WiPhrhUhML8azEwIxeaCH1Z9T2d3V1hsRe+EWjl0tAAAEuNlhTdRI0yV+S7hviCfSiypxPb8Cn5zIwPOT+3eZZWGo62KQ6qXqjQKnM0sAAGFd+MNY56TBIE9HXLpVhmNXCjBnlI9V2pFXWo3/JGUBAMb1c8NAK6yIHOBuj8X3DsCnJzNxNb8cnyVmIa2gAg+H+nR40m1vJITAR8cz8N7+VFTU1ENuI8PcUT54Zlwggr2dWlzEtN4ocCWvDEnpxbiQU2oKVP362OPX44O63fei3iiQVVyJK3nlKK6oQXWdERAC7g5qeDhpMMDDMqPAWcWV+Cwxy7Sg7eNj/PDarCGwt/BaajYyGaLCfbHu+6vIKzNg/9kcq/3Ooe6DQaqXupJXhnJDHexVcgzSdb2J5j81cWAfXLpVhqSMYtw3xAOOFn42ndEoEPNpCipr6uGl1WDaUJ1F6/8pB7UCT48LwKGLefj+Yh5OpRdj7oZ4bJgf2iUfMSOEQL1RoLZeQG4jg1Iu6xIPbC6prMH//ucMDlxoWLQxzN8Ff5gTgiFerS8DIreRYbDOCYN1TiiprEFxZQ12nMjEtfwKrNxzFv/v24uIvscfT0b6w8Ox614WKq2uxb9+uIGNcddQWdPSLf9lAAAZgO8v5uHhUB/MGWX+kFhnNOLwpXwcvpQHo2hYmXzuKB+8PSfErPW0h6NGiUfC+2LLDzdw4kYR+ns4IKSLTn+groFBqpdKvH1Zb6Sfc5efOBvgZgdfF1tkFlch/lqhxYPMP49dx7GrBVDKZYga7QtFJ8/VuBsbmQz3DfGEn5sdPj2ZidScUsz46xH8ZlJ/PDcpqNNHRIQQKK6sRWZRJbKKq5BVXIm8MgPyygzIL6tGfpkBJZW1KKuuQ229ET+dbSQDYKeSw9lOBRc7JYoqDBjqo0W4v4vFAvLx64VYtvM0cvTVUMltsGLGYPxqbEC7L80526nw/JT+WHLfAHx6KgtbfkhDVnEV1n5/FZvirmP2SG8sGBvQpT6E640CW+Nv4P2Dl1FWXQcA0ChtMMDDET7OtlArbSBuPxMyq7gSmcVVOHGjCCduFGH1NxcxP8If0ZH+Zpmfd7OkCruTsnBTXw0AGOajxewR3l1iRf8BHo6YMMAdR68UYE9yNvq62MLZzjKX8qn7sf5PLFlFUnoJAFh0/oFUMpkMkwZ64KPj6Yi/VoB7gtwsVvfZLD3+37eXAAC/GObdpUYZBng4YvG9A3Dkcj4SrhfiLwcvY+fJDDwzPhCPjvaFUweCib6yFpnF/w1KWcVVTYJTRYujGHcnAFTU1KOipgrZJVU4d/vOUbmNDCN9nXHfEA88ONy7U543WFdvxN++v4q/fX8FRgEEutvjb4+P6nDQcdQo8evxgVgQ6Y8DF27hn0evIymjBJ8lZuGzxCwM1jkiwM0eI3ydobW17GjqT125VYaX/nPGdEl/gIcDRvm5YJiP9o5/TJVU1kBuI8O2hHRkl1Rh3aGr2BR3Db8Y7oVfjQvECAl3tpUb6nDwwi2cvFEEAcBWKcfskd5mXdLAHB4I9sT1/Apkl1Ths8Qs/Hp8oLWbRF0Ug1Qv1fjole4QpICGda78Xe2QXlSJr8/l4DeT+3V6nRWGOizZkYzaeoEZITqEB3S9vtLaKvHJwgh8eSYHq/an4qa+Gn/4KhVrYi9j4oA+mDDQHcN8tKgw1MFOJYdMJkO9UaCypg7lhjqUVNaiuLIGxRU1KK6sxb9/TEdWcaVptKI1nk5q9HWxQ18XW+i0GvRxUKOPoxoejhq42qsQe+EWFHIZlDY2UMplqBcChjojKm7XW1BugFphg9OZJbhRWInE9GIkphfjj99cQpi/C2aP9MbMYV5mucHgal4ZfvdpClKyGlbynxfaF2/PHmrWOTgKuQ1mDvPCzGFeSEwvxoc/pCH2/C1czC3DxdwyfHs+F/36OGCknzOGejtZbAkLIQS2n8jE/31xHoY6IxzVCrw8czAeH+2HHSczW93X2U6FJyL88OvxgTiYegsfHmu43LX39E3sPX0TYf4ueGZcIKYN9bzrSG12SRW+PHMTJ28Uoba+YZxyeF8tZg7z6lDo7ywKGxtEjW6YL5VWUIHDl/Lx5D3+Zjt+Wx4kzmf7dQ8MUr1QUUUN0goqAACj/Jyt25g2kslkeHCEN9YfuoozWXqcSCvCmMDOfZjxW/vOI62gAl5aDVY9PAz7z+befScraOybB4I9sSc5G1t+SMPlW+X45nwuvjkvvc3uDipTUPJ1bfhvX5eGy6zezrZ3vYTYePm4kQINzzlz0ijhpbUF8N8PiqziSsRdzsdXZ3KQcL3QFKr+74sLmDSwDx4O9cH9Qzzbfdmy3FCHD+KuYdOR66ipM8JRo8Af5oRg9sjOnUAc5u+CMH8X6Ctr8dXZHPz9yDWkF1bian45ruaX4/PTMgz11mKkrzP69XHotMvr+qparNx9Fl+dzQHQMN/wj/OGQ6dt38iqQm6D6SFemB7ihbNZemz5IQ1fnLlp+j55azV4cKQ3RvZ1Rn8PB2iUchiFwI3CSqTmlCL2wq0mPw8+zraYOcyrS87r+yl3BzUeGuGN/yRl4fuLDefAZS/o5xikeqHGZQ/69bHvVtf9vZ1tER7gipM3ivDmvvPYt3hcp60t80XKTXyWmAWZDHg/amS36CeNUo7Hx/jhsdG+OJOlx9Er+Th2tQDX8yuQV9b0eYU/n6vkYqeCi70Kc0f5mAKTrcpyd5/1dbHD/Ah/zI/wx63SanyRchP7Um7iTJYe39+eWO+oVmDWcC/MGu6F0QGurYaqXH01didn4cNjaSgorwEATBrYB6slhIiO0NopTWGxqKIGpzOLkZxRgsKKGpzOLMHpzBI4qhUY4euMkb7OCPY23zMvkzOK8cL2ZGQVV0FhI8P/Th+EZ8cHdXiZhmG3F8d8ecZgfHQ8Ax//mI6b+mr8Pe76XfcN6mOPSQP6oL+HQ5e46aAtRvk543JeGc5k6bF0RzL2L53QJUfQyHoYpHqhxNuX9brjX1YPBHviXLYeqTml+NO3l/DKzCFmryOtoOEOLABYPKU/Iiw4J8scZDIZRvg6Y4SvMxbfOwAAbj+Q1QghBGxkMtiq5C2uyD5lsIelm9uMp5MGz04IwrMTgnAtvxx7krLx0fF0lFTWYsfJTOw4mQmlXAZfVzt4Omrg5qCC3EaGUD8XpBVU4PxNPRLTi9G4pmaguz1WTB+EaUN1Vv3wdrVX4d7BnpgyyANZxVVIzizGmSw9ygx1OHa1ADPXHsXogIZLZQ8E3/1S2Z3U1Bmx7tBVbDh0FXVGAV9XW/zt8VCzr9Tt4aRBzAMD8fzkfvj6XA5OpBXjTFYJsoqrYKirhxCAn6sdgvrYY2w/d0wbqsP3F/PM2gZLkMlkmDPSxzRH8LU95/DXx0Z2myBInY9BqheKv9awqGR4QOdeGusMDmoFHg71wcfHM/D3I9dxT5CbWT/8yw11WLjtFMqq6xDm74Kl9w0w27GtSSm36fSVoTtDvz4OeHHaIOi0GtworMDpjBJcvlWG0uo6XM+vwPX8ClPZxseJNBod4IJfhvlibqhPlzp3mawhBPq62mHmMC9cuVWO5IxiXLpVhpM3inHyRjF8nG3x9NgAPDrat10T1BOuFeKtfedx6VbD8gW/GO6Fdx8e1qkjKBqlHHNH9cXcUX07rQ5r0yjliBrth38cvY59KTcxYYA7fhnua+1mURfBINXLFFfU4ExWCQBg4oA+1m2MREO9tXh6bAC2xt9AzKensW/xeLPc5WU0CsTsPI2reeXwdFJj4/xQqy91YGlddQKsjUyGIHcHBLk7QAiBW2UGZBVVIr/cgOKKGggAvrfncw32csLoABf4u3Xt+TdAw4TmIV5OGOLlhPuGeOCjH9Px8fEMZJdU4Z39qfjTgUuYNlSHuaN8cE+QW4uXWw119Yi/WogPjlw3rbzvZq/C27NDMGu4V4fb2JafCaDnT4z2c7XD8vsH4E8HLuO1vefQ//Zdj0QMUr3MD9cKIAQw0NPBonNFzO2VmYNxKr0I57JLMf+fx/Hpc5EdOh8hBN7+8gIOXLgFldwGm54M43O2uiiZTAadkwa6n31/uvsHuaeTBr+bOgi/ndIfn5/OxpYfbuBibhn23Z4vppLbYKSvM/q62sLdQY2y6lrk6qtx8kYxyg0Nd1kq5TI8NtoPyx8YaLFHGPUmv5ncH6czS3AwNQ8Lt53C3t+OQ18X8y/VQd0Lg1Qvc/Ryw3OsuutoVCO1Qo7NC0bjl5sSkFFUifn//BE7/icSfRzbf6u8EAKrv7mErfE3AACrHxnGvzQtpK2jHb1J42WkR8N9cTZbj89OZeG71Fu4qa++vThm8308ndSYEeKFhROD4ONsy37tJHIbGf762Cg8sikBqTmleGbrSWxfeA/crPgMULI+BqleRAiBI1fyAQATBnbvIAU0/AX/8bMRiPp7Aq7lV2DO+h+w8cnQdi3sV1dvxLv7L+LDH9IAAO/MDenRcz2o+5DJZBje1xnD+zrj7dlDcb2gAimZJbhVakBBuQGOGgU8HDUI9nbCcB9tj3poclcOgvZqBTYvCMec9T/g8q1yPP6PH/HRsxFdarFesiwGqV7kWn55w2MxFDYY0w0nmrfE19UOHy+8B89sPYm0ggo8sikBK2cMxpP3+N91flN+mQGLP0nC8bQiAMAbvwjG/AjzLbhH1B5tDQ+WWIyWWuftbItPFt6D+f/8EZdvleOxD37EhwtGI6CLr4tFnYNBqhc5cvuyXkSgq0XXCOpsge722Pvbcfjdp6dxMDUPb31xAf/+MR3LHxiI+wZ7NjtXfWUtNv+Qhi3H0lBmqIODWoE//XIEpodY72HE3UlXHS3gpGiypP4eDvj0uUg88Y/juJ5fgV/87Rjemdv5i71S18Mg1YuYLusNcLdyS8xPa6vEB9Hh+PeP6Xj/4GVcy6/A4k+SoVHaYEygG9ztVVDKbZCaW4oLN0tRd3uRoaHeTlj7+Cj06+Ng5TMgou7G380eu34zFku2J+PEjSIs3XEae5OzsfT+gWZft4u6LgapXkJfVYv4qw23Rk8aaP1FFzuDjY0MC8YGYG6oDz6Iu449ydnILqnCkcv5zcoO1jli6X0DMG2orkfNLSEiy9JpNfhkYQTWfn8V676/gkOX8nHoUj5C/ZwxZZAHxgS6wsfFFn0c1ZBBBqMQKDfUoaDcgOraelTV1qO61ojq2nrT14ZaI2xkQI6+ClpbJTycNPBx1mCgpyMcuap6l8Mg1Ut8ey4XNfVGDPR0wEDPnj364qRR4sVpg/C7qQORmlOG5MxilFfXobKmHv08HDDK1xl9XWy5MjERmYVCboOYBwZi7igfrPv+KvaezkZSRgmSMko6dNwfbi+e/FO+rrYYomtYeyzM3wVjAlt/XBJ1PgapXmLv6WwAwOyRPr0mQMhkMgR7O5n1+WVE1LKuOnfOkgLd7fHnR0fgxWkDcehiPg5fysPF3DLk6qtRU29sUlatsIGtUg6NUg6N0gYapRy2SjnUSjk0ChsYBRDobofiylrcKq1GRlElcvTVyCyqQmZRFQ5cuAUAppuHxvV3x8SB7gj2cuo1v+O7CgapXiBXX21a8fihEd5Wbo1lddWVuomo5/LS2uKJCD/T7xajUaDs9qKpNjLATqXAzpOZdz3Oz383FVfUIDW3FKk5ZTifrUfC9ULk6Ktx7GoBjl0twOpvgAA3Ozw4whsPjvDGQE9H858cNcMg1Qt8eeYmhADC/V3M8igVIiJqOxsbWbuemdjoTn8I2irlCA9wRZi/C/LLDNDaKXHsSgF+uFaAG4WV+Nv3V/G3769ikKcj5ob64NFwX3xzLrdNdfIPy/ZjkOoFTJf1RvG23M7GyxtEZCkymcz0KKv7hnhi/AB3XMwpw5msEly+VY5Lt8rw3tcX8advL2GYjxYRQW7w5fxQs2OQ6uHO39TjXHYpFDYyzBrW8QeYEhFR16RWyDHC1xkjfJ1RVVOPczf1OJFWhOySKiRnliA5swTeWg0mDOiDEB8t5Lxj2SwYpHq4DYevAQBmDPPqUQ8x5cgPdQX8OaSuylYlx+gAV4wOcEVWcSV+vF6EM1kluKmvxs5TmYhNvYVJA/pglJ/zXZ8CQa1jkOrBrueXY//ZHADA83ysBBFRr9TXxQ6PhNlhZogOP6YVIf5aAYoqarDndDa+u3gLEwb0wZhAVygZqCRhkOrBNsVdgxDA/UM8MMTL+ksA8K93IiLrsVMrcO9gD4zv746TN4pw9Eo+Sqvr8NXZHBy5nI+JA/vg4VAfrkvVTgxSPdTNkirsTmqYZP78lP5Wbg0RUffXU5ZTUSlsMK6/OyICXZGcUYJDl/NQUlmLr87m4MSNIiya1A/zI/wYqNqIQaoHEkLgzX3nUWcUiAxyQ6ifi7WbRJ2II31EJIVCboPRga4Y5e/cEKgu5SG/zIDff3kBGw9fw6JJQZgf4d+jHnLfGRikeqC9p7MRe+EWlHIZ3nwo2NrNIepSunvw7O7tp65HYWOD0QGuGOXnDJXcBusOXUVWcRX+8FUqNsVdZ6C6CwapHuZWaTXe/Pw8AGDZ/QMxWGf9uVFERNT1KWwaHk3zPxODkJxRgsOX8lBQbsAfvkrF+wevYOLAPhgT4IqnxwVYu6ldCoNUD1JWXYtFHyWitLoOw/tq8dzEIGs3iYiIupnGEapQPxckZxTj0KU8FFfWYv/ZHMRdykN+eTWeiPCHj7OttZvaJTBI9RDlhjo8veUkkjNKoLVVYs2jI7g2SBvxUgkRUXNyGxnCA1wx6meBav2ha9h4+BruHeyJ6Eh/TOjvDptevLinTAghrN2Inqq0tBRarRZ6vR5OTp13ie1qXhliPk3BmSw9nDQKfLLwHoT4aDutvpYwjBAR9Wz1RoHUnFLcKKxA/LVC03ZPJzWmDdVh+lAdxgS69og/4tvz+W31s92wYQMCAwOh0WgQFhaGo0ePtlo+Li4OYWFh0Gg0CAoKwqZNm5qV2bVrF4KDg6FWqxEcHIw9e/a0u14hBN566y14e3vD1tYWkydPxvnz5zt2smamr6zF2u+uYOZfj5lC1EfPRlg8RBERUc8nt5EhxEeLTxbeg4Mxk/CrcQFw1Chwq9SAbQnpeOKfxzH6nYOI+fQ0dp7MwNW8MhiNPX+sxqqX9nbu3Illy5Zhw4YNGDduHP7+979jxowZuHDhAvz8mq/FkZaWhpkzZ2LhwoX46KOP8MMPP+D5559Hnz59MG/ePABAQkICoqKi8Pvf/x5z587Fnj178Oijj+LYsWOIiIhoc71//OMfsWbNGmzduhUDBw7EH/7wBzzwwAO4dOkSHB0dLddJLTiXrce/4m/gizM3UV1rBABMGdQH7z48DF5aXrMmIqLO1d/DAW8+OBQvzxiM+KuF+OZcLg5cyEVxZS12J2Wb1jF00igQ6u+CIV5OCHCzQ4CbPQLc7eHhqO4xD0+26qW9iIgIhIaGYuPGjaZtQ4YMwZw5c7Bq1apm5VesWIF9+/YhNTXVtG3RokVISUlBQkICACAqKgqlpaX4+uuvTWWmT58OFxcXbN++vU31CiHg7e2NZcuWYcWKFQAAg8EAT09PrF69Gs8991ybzq+zLu394csL+OexNADAYJ0jfjO5Hx4a4W3VH0pe2iMi6h3utOhoXb0RJ9KKcPRqAZLSi5GSVWL6Y//n1AobuNmr4GKvgqu9Ci52KjhoFFArbKBRyqFW2ECtuP1fpQ0UNjLIZDLYyGSQ2wA2ssavG0Kdue9Qb8/nt9VGpGpqapCYmIiXX365yfapU6ciPj6+xX0SEhIwderUJtumTZuGzZs3o7a2FkqlEgkJCVi+fHmzMu+//36b601LS0Nubm6TutRqNSZNmoT4+Pg7BimDwQCDwWD6Wq/XA2j4hpjTQ0NdkJ1XiMfG+GKkrwtkMhnKysrMWkd7VVZYt34iIrKM1j7TQjxUCPHwBsZ6o7beiMu5ZUjJKkFaQQXSiyqRWVSJmyXVqDIIZFUAWWZoz6/HB2L5AwPNcKT/ajzHtow1WS1IFRQUoL6+Hp6enk22e3p6Ijc3t8V9cnNzWyxfV1eHgoICeHl53bFM4zHbUm/jf1sqk56efsdzWrVqFf7v//6v2XZfX9877tMRzWeHERERda6F1m7Az7z1PvBWJx27rKwMWm3r846tvvzBzy9HCSFavUTVUvmfb2/LMc1V5qdeeeUVxMTEmL42Go0oKiqCm5tbj7kW3JLS0lL4+voiMzOzU+9O7MnYhx3HPjQP9mPHsQ87ztp9KIRAWVkZvL2971rWakHK3d0dcrm82ehTXl5es5GgRjqdrsXyCoUCbm5urZZpPGZb6tXpdAAaRqa8vLza1Dag4fKfWq1uss3Z2fmO5XsaJycn/tLoIPZhx7EPzYP92HHsw46zZh/ebSSqkdWWP1CpVAgLC0NsbGyT7bGxsRg7dmyL+0RGRjYrf+DAAYSHh0OpVLZapvGYbak3MDAQOp2uSZmamhrExcXdsW1ERETUCwkr2rFjh1AqlWLz5s3iwoULYtmyZcLe3l7cuHFDCCHEyy+/LKKjo03lr1+/Luzs7MTy5cvFhQsXxObNm4VSqRT/+c9/TGV++OEHIZfLxXvvvSdSU1PFe++9JxQKhfjxxx/bXK8QQrz33ntCq9WK3bt3i7Nnz4rHH39ceHl5idLSUgv0TPei1+sFAKHX663dlG6Lfdhx7EPzYD92HPuw47pTH1o1SAkhxPr164W/v79QqVQiNDRUxMXFmd5bsGCBmDRpUpPyhw8fFqNGjRIqlUoEBASIjRs3NjvmZ599JgYNGiSUSqUYPHiw2LVrV7vqFUIIo9Eo3nzzTaHT6YRarRYTJ04UZ8+eNc9J9zDV1dXizTffFNXV1dZuSrfFPuw49qF5sB87jn3Ycd2pD/mIGCIiIiKJrP6IGCIiIqLuikGKiIiISCIGKSIiIiKJGKSIiIiIJGKQog7ZsGEDAgMDodFoEBYWhqNHj1q7SVaxatUqjB49Go6OjvDw8MCcOXNw6dKlJmWEEHjrrbfg7e0NW1tbTJ48GefPn29SxmAw4IUXXoC7uzvs7e3x0EMPISur6dOoiouLER0dDa1WC61Wi+joaJSUlHT2KVrcqlWrIJPJsGzZMtM29mHbZGdn48knn4Sbmxvs7OwwcuRIJCYmmt5nP7aurq4Or732GgIDA2Fra4ugoCC8/fbbMBr/+wBe9mFzR44cwYMPPghvb2/IZDLs3bu3yfuW7LOMjAw8+OCDsLe3h7u7O5YsWYKamprOOG3rriNF3Vvjelz/+Mc/xIULF8TSpUuFvb29SE9Pt3bTLG7atGliy5Yt4ty5c+L06dNi1qxZws/PT5SXl5vKvPfee8LR0VHs2rVLnD17VkRFRTVbm2zRokXCx8dHxMbGiqSkJDFlyhQxYsQIUVdXZyozffp0ERISIuLj40V8fLwICQkRv/jFLyx6vp3txIkTIiAgQAwfPlwsXbrUtJ19eHdFRUXC399fPP300+L48eMiLS1NHDx4UFy9etVUhv3Yuj/84Q/Czc1NfPnllyItLU189tlnwsHBQbz//vumMuzD5vbv3y9effVVsWvXLgFA7Nmzp8n7luqzuro6ERISIqZMmSKSkpJEbGys8Pb2FosXL+6U82aQIsnGjBkjFi1a1GTb4MGDxcsvv2ylFnUdeXl5AoBpfTKj0Sh0Op147733TGWqq6uFVqsVmzZtEkIIUVJSIpRKpdixY4epTHZ2trCxsRHffPONEEKICxcuCABNFphNSEgQAMTFixctcWqdrqysTAwYMEDExsaKSZMmmYIU+7BtVqxYIcaPH3/H99mPdzdr1izxzDPPNNn28MMPiyeffFIIwT5si58HKUv22f79+4WNjY3Izs42ldm+fbtQq9WdssAnL+2RJDU1NUhMTMTUqVObbJ86dSri4+Ot1KquQ6/XAwBcXV0BAGlpacjNzW3SX2q1GpMmTTL1V2JiImpra5uU8fb2RkhIiKlMQkICtFotIiIiTGXuueceaLXaHtPvv/3tbzFr1izcf//9TbazD9tm3759CA8Pxy9/+Ut4eHhg1KhR+Mc//mF6n/14d+PHj8d3332Hy5cvAwBSUlJw7NgxzJw5EwD7UApL9llCQgJCQkKaPHB42rRpMBgMTS5xm4vVHlpM3VtBQQHq6+ubPcTZ09Oz2QOhexshBGJiYjB+/HiEhIQAgKlPWuqv9PR0UxmVSgUXF5dmZRr3z83NhYeHR7M6PTw8ekS/79ixA0lJSTh58mSz99iHbXP9+nVs3LgRMTExWLlyJU6cOIElS5ZArVbjqaeeYj+2wYoVK6DX6zF48GDI5XLU19fjnXfeweOPPw6AP4tSWLLPcnNzm9Xj4uIClUrVKf3KIEUdIpPJmnwthGi2rbdZvHgxzpw5g2PHjjV7T0p//bxMS+V7Qr9nZmZi6dKlOHDgADQazR3LsQ9bZzQaER4ejnfffRcAMGrUKJw/fx4bN27EU089ZSrHfryznTt34qOPPsInn3yCoUOH4vTp01i2bBm8vb2xYMECUzn2YftZqs8s2a+8tEeSuLu7Qy6XN0v3eXl5zf4S6E1eeOEF7Nu3D4cOHULfvn1N23U6HQC02l86nQ41NTUoLi5utcytW7ea1Zufn9/t+z0xMRF5eXkICwuDQqGAQqFAXFwc1q5dC4VCYTo/9mHrvLy8EBwc3GTbkCFDkJGRAYA/i23x0ksv4eWXX8Zjjz2GYcOGITo6GsuXL8eqVasAsA+lsGSf6XS6ZvUUFxejtra2U/qVQYokUalUCAsLQ2xsbJPtsbGxGDt2rJVaZT1CCCxevBi7d+/G999/j8DAwCbvBwYGQqfTNemvmpoaxMXFmforLCwMSqWySZmcnBycO3fOVCYyMhJ6vR4nTpwwlTl+/Dj0en237/f77rsPZ8+exenTp02v8PBwzJ8/H6dPn0ZQUBD7sA3GjRvXbOmNy5cvw9/fHwB/FtuisrISNjZNPx7lcrlp+QP2YftZss8iIyNx7tw55OTkmMocOHAAarUaYWFh5j85s09fp16jcfmDzZs3iwsXLohly5YJe3t7cePGDWs3zeJ+85vfCK1WKw4fPixycnJMr8rKSlOZ9957T2i1WrF7925x9uxZ8fjjj7d462/fvn3FwYMHRVJSkrj33ntbvPV3+PDhIiEhQSQkJIhhw4Z129ul7+and+0JwT5sixMnTgiFQiHeeecdceXKFfHxxx8LOzs78dFHH5nKsB9bt2DBAuHj42Na/mD37t3C3d1d/O///q+pDPuwubKyMpGcnCySk5MFALFmzRqRnJxsWhLHUn3WuPzBfffdJ5KSksTBgwdF3759ufwBdU3r168X/v7+QqVSidDQUNPt/r0NgBZfW7ZsMZUxGo3izTffFDqdTqjVajFx4kRx9uzZJsepqqoSixcvFq6ursLW1lb84he/EBkZGU3KFBYWivnz5wtHR0fh6Ogo5s+fL4qLiy1wlpb38yDFPmybL774QoSEhAi1Wi0GDx4sPvjggybvsx9bV1paKpYuXSr8/PyERqMRQUFB4tVXXxUGg8FUhn3Y3KFDh1r8PbhgwQIhhGX7LD09XcyaNUvY2toKV1dXsXjxYlFdXd0p5y0TQgjzj3MRERER9XycI0VEREQkEYMUERERkUQMUkREREQSMUgRERERScQgRURERCQRgxQRERGRRAxSRERERBIxSBERERFJxCBFREREJBGDFBH1ek8//TRkMhkWLVrU7L3nn38eMpkMTz/9NICGJ9E/99xz8PPzg1qthk6nw7Rp05CQkNBs3/j4eMjlckyfPr2zT4GIrIRBiogIgK+vL3bs2IGqqirTturqamzfvh1+fn6mbfPmzUNKSgr+9a9/4fLly9i3bx8mT56MoqKiZsf88MMP8cILL+DYsWPIyMiwyHkQkWUprN0AIqKuIDQ0FNevX8fu3bsxf/58AMDu3bvh6+uLoKAgAEBJSQmOHTuGw4cPY9KkSQAAf39/jBkzptnxKioq8Omnn+LkyZPIzc3F1q1b8cYbb1juhIjIIjgiRUR0269+9Sts2bLF9PWHH36IZ555xvS1g4MDHBwcsHfvXhgMhlaPtXPnTgwaNAiDBg3Ck08+iS1btoDPiCfqeRikiIhui46OxrFjx3Djxg2kp6fjhx9+wJNPPml6X6FQYOvWrfjXv/4FZ2dnjBs3DitXrsSZM2eaHWvz5s2mfadPn47y8nJ89913FjsXIrIMBikiotvc3d0xa9Ys/Otf/8KWLVswa9YsuLu7Nykzb9483Lx5E/v27cO0adNw+PBhhIaGYuvWraYyly5dwokTJ/DYY48BaAhgUVFR+PDDDy15OkRkAZwjRUT0E8888wwWL14MAFi/fn2LZTQaDR544AE88MADeOONN/Dss8/izTffNN3Zt3nzZtTV1cHHx8e0jxACSqUSxcXFcHFx6fTzICLL4IgUEdFPTJ8+HTU1NaipqcG0adPatE9wcDAqKioAAHV1ddi2bRv+/Oc/4/Tp06ZXSkoK/P398fHHH3dm84nIwjgiRUT0E3K5HKmpqab//6nCwkL88pe/xDPPPIPhw4fD0dERp06dwh//+EfMnj0bAPDll1+iuLgYv/71r6HVapvs/8gjj2Dz5s2mES8i6v4YpIiIfsbJyanF7Q4ODoiIiMBf/vIXXLt2DbW1tfD19cXChQuxcuVKAA2X9e6///5mIQpomF/17rvvIikpCaGhoZ16DkRkGTLB+3GJiIiIJOEcKSIiIiKJGKSIiIiIJGKQIiIiIpKIQYqIiIhIIgYpIiIiIokYpIiIiIgkYpAiIiIikohBioiIiEgiBikiIiIiiRikiIiIiCRikCIiIiKS6P8DxX45r7QwCfoAAAAASUVORK5CYII=", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "sns.distplot(data['MSA'])\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 280, + "id": "cf701682", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAjEAAAGdCAYAAADjWSL8AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8pXeV/AAAACXBIWXMAAA9hAAAPYQGoP6dpAAAdeElEQVR4nO3dbWzd9X338Y9zZzvMOYNktms1Q6kUMbqwazR0ISldkICMrFlUKi20oe6mMmgFhXmBQTN2A0hNSrYGtEblbtNABRqeLFo1kYzsRlFRAokC2cptHwxBgJjQyjkOi+OE5H894OJcc9KxONwc/8zrJR3J53++x/79Yznnrd859mmpqqoKAEBhJjR7AQAAJ0PEAABFEjEAQJFEDABQJBEDABRJxAAARRIxAECRRAwAUKRJzV7AB+Xo0aN57bXX0tHRkZaWlmYvBwA4AVVVZf/+/enp6cmECe++1zJuI+a1117LzJkzm70MAOAk7N69Ox//+MffdWbcRkxHR0eSt/8Rpk2b1uTVAAAnYnBwMDNnzmw8jr+bcRsx7zyFNG3aNBEDAIU5kZeCeGEvAFAkEQMAFEnEAABFEjEAQJFEDABQJBEDABRJxAAARRIxAECRRAwAUCQRAwAUScQAAEUat++dBO+nqqpy8ODBZi+DvP29GB4eTpK0trae0Pur8MFra2vzveBDJ2LgBBw8eDCLFy9u9jJgzNq4cWPa29ubvQw+YjydBAAUyU4MnIC2trZs3Lix2csgb++KXXLJJUmSDRs2pK2trckrIonvA00hYuAEtLS02Cofg9ra2nxf4CPM00kAQJFEDABQJBEDABRJxAAARRIxAECRRAwAUCQRAwAUScQAAEUSMQBAkUQMAFAkEQMAFEnEAABFEjEAQJFEDABQJBEDABRJxAAARRIxAECRRAwAUCQRAwAUScQAAEUSMQBAkUQMAFAkEQMAFEnEAABFEjEAQJFEDABQJBEDABRJxAAARRIxAECRRAwAUCQRAwAUScQAAEUSMQBAkUQMAFAkEQMAFEnEAABFEjEAQJFEDABQJBEDABRJxAAARRIxAECRRAwAUKRRRcxbb72VP/3TP82sWbPS3t6eT3ziE7n11ltz9OjRxkxVVbn55pvT09OT9vb2nH/++XnmmWdGfJ7h4eFcc801mTFjRk455ZQsXbo0r7zyyoiZgYGB9Pb2plarpVarpbe3N/v27Tv5MwUAxpVRRcxtt92Wu+66K+vWrctzzz2XNWvW5C//8i/z3e9+tzGzZs2arF27NuvWrcuOHTvS3d2diy66KPv372/M9PX1ZcOGDVm/fn0ee+yxvPnmm1myZEmOHDnSmFm+fHl27dqVTZs2ZdOmTdm1a1d6e3vfh1MGAMaFahQ+97nPVV/96ldHHPvCF75QffnLX66qqqqOHj1adXd3V9/+9rcbtx88eLCq1WrVXXfdVVVVVe3bt6+aPHlytX79+sbMq6++Wk2YMKHatGlTVVVV9eyzz1ZJqscff7wxs23btipJ9fzzz5/QWuv1epWkqtfrozlFYIw7cOBAtXDhwmrhwoXVgQMHmr0c4H02msfvUe3EnHfeefmXf/mX/OQnP0mS/Pu//3see+yx/PZv/3aS5MUXX0x/f38WLVrUuE9ra2sWLlyYrVu3Jkl27tyZw4cPj5jp6enJnDlzGjPbtm1LrVbLvHnzGjPnnntuarVaY+ZYw8PDGRwcHHEBAMavSaMZvvHGG1Ov1/Mrv/IrmThxYo4cOZJvfetb+dKXvpQk6e/vT5J0dXWNuF9XV1deeumlxsyUKVNy6qmnHjfzzv37+/vT2dl53Nfv7OxszBxr9erVueWWW0ZzOgBAwUa1E/Pwww/ngQceyEMPPZQnn3wy999/f/7qr/4q999//4i5lpaWEderqjru2LGOnfl58+/2eVauXJl6vd647N69+0RPCwAo0Kh2Yv74j/843/zmN/PFL34xSXLWWWflpZdeyurVq/N7v/d76e7uTvL2TsrHPvaxxv327t3b2J3p7u7OoUOHMjAwMGI3Zu/evVmwYEFj5vXXXz/u67/xxhvH7fK8o7W1Na2traM5HQCgYKPaiTlw4EAmTBh5l4kTJzZ+xXrWrFnp7u7O5s2bG7cfOnQoW7ZsaQTK3LlzM3ny5BEze/bsydNPP92YmT9/fur1erZv396YeeKJJ1Kv1xszAMBH26h2Yn7nd34n3/rWt/LLv/zL+dVf/dU89dRTWbt2bb761a8mefspoL6+vqxatSqzZ8/O7Nmzs2rVqkydOjXLly9PktRqtVx++eW57rrrMn369Jx22mm5/vrrc9ZZZ+XCCy9Mkpx55pm5+OKLc8UVV+Tuu+9Oklx55ZVZsmRJzjjjjPfz/AGAQo0qYr773e/mz/7sz3LVVVdl79696enpyde+9rX8+Z//eWPmhhtuyNDQUK666qoMDAxk3rx5efTRR9PR0dGYuf322zNp0qQsW7YsQ0NDueCCC3Lfffdl4sSJjZkHH3ww1157beO3mJYuXZp169a91/MFAMaJlqqqqmYv4oMwODiYWq2Wer2eadOmNXs5wPtkaGgoixcvTpJs3Lgx7e3tTV4R8H4azeO3904CAIokYgCAIokYAKBIIgYAKJKIAQCKJGIAgCKJGACgSCIGACiSiAEAiiRiAIAiiRgAoEgiBgAokogBAIokYgCAIokYAKBIIgYAKJKIAQCKJGIAgCKJGACgSCIGACiSiAEAiiRiAIAiiRgAoEgiBgAokogBAIokYgCAIokYAKBIIgYAKJKIAQCKJGIAgCKJGACgSCIGACiSiAEAiiRiAIAiiRgAoEgiBgAokogBAIokYgCAIokYAKBIIgYAKJKIAQCKJGIAgCKJGACgSCIGACiSiAEAiiRiAIAiiRgAoEgiBgAokogBAIokYgCAIokYAKBIIgYAKJKIAQCKJGIAgCKJGACgSCIGACiSiAEAiiRiAIAiiRgAoEgiBgAokogBAIo06oh59dVX8+UvfznTp0/P1KlT8+u//uvZuXNn4/aqqnLzzTenp6cn7e3tOf/88/PMM8+M+BzDw8O55pprMmPGjJxyyilZunRpXnnllREzAwMD6e3tTa1WS61WS29vb/bt23dyZwkAjDujipiBgYF85jOfyeTJk7Nx48Y8++yz+c53vpNf/MVfbMysWbMma9euzbp167Jjx450d3fnoosuyv79+xszfX192bBhQ9avX5/HHnssb775ZpYsWZIjR440ZpYvX55du3Zl06ZN2bRpU3bt2pXe3t73fsYAwPhQjcKNN95YnXfeef/j7UePHq26u7urb3/7241jBw8erGq1WnXXXXdVVVVV+/btqyZPnlytX7++MfPqq69WEyZMqDZt2lRVVVU9++yzVZLq8ccfb8xs27atSlI9//zzJ7TWer1eJanq9fpoThEY4w4cOFAtXLiwWrhwYXXgwIFmLwd4n43m8XtUOzE//OEPc8455+R3f/d309nZmbPPPjv33ntv4/YXX3wx/f39WbRoUeNYa2trFi5cmK1btyZJdu7cmcOHD4+Y6enpyZw5cxoz27ZtS61Wy7x58xoz5557bmq1WmPmWMPDwxkcHBxxAQDGr0mjGf7P//zP3HnnnVmxYkX+5E/+JNu3b8+1116b1tbWfOUrX0l/f3+SpKura8T9urq68tJLLyVJ+vv7M2XKlJx66qnHzbxz//7+/nR2dh739Ts7Oxszx1q9enVuueWW0ZzOmFdVVQ4ePNjsZcCY8t9/Jvx8wM/X1taWlpaWZi/jAzeqiDl69GjOOeecrFq1Kkly9tln55lnnsmdd96Zr3zlK425Y//hqqr6X/8xj535efPv9nlWrlyZFStWNK4PDg5m5syZ//tJjWEHDx7M4sWLm70MGLMuueSSZi8BxqSNGzemvb292cv4wI3q6aSPfexj+eQnPzni2JlnnpmXX345SdLd3Z0kx+2W7N27t7E7093dnUOHDmVgYOBdZ15//fXjvv4bb7xx3C7PO1pbWzNt2rQRFwBg/BrVTsxnPvOZvPDCCyOO/eQnP8npp5+eJJk1a1a6u7uzefPmnH322UmSQ4cOZcuWLbntttuSJHPnzs3kyZOzefPmLFu2LEmyZ8+ePP3001mzZk2SZP78+anX69m+fXt+4zd+I0nyxBNPpF6vZ8GCBe/hdMv15q9/KdWEUX27YHyqquToW29/PGFS8hHYMocT0XL0rfzCrh80exkfqlE9Kv7RH/1RFixYkFWrVmXZsmXZvn177rnnntxzzz1J3n4KqK+vL6tWrcrs2bMze/bsrFq1KlOnTs3y5cuTJLVaLZdffnmuu+66TJ8+Paeddlquv/76nHXWWbnwwguTvL27c/HFF+eKK67I3XffnSS58sors2TJkpxxxhnv5/kXo5owKZk4udnLgDFiSrMXAGNO1ewFNMGoIubTn/50NmzYkJUrV+bWW2/NrFmzcscdd+Syyy5rzNxwww0ZGhrKVVddlYGBgcybNy+PPvpoOjo6GjO33357Jk2alGXLlmVoaCgXXHBB7rvvvkycOLEx8+CDD+baa69t/BbT0qVLs27duvd6vgDAONFSVdW4jLfBwcHUarXU6/ViXx8zNDTUeGHv/k/12okB4H925HA6nvx+krJf2Duax2/vnQQAFEnEAABFEjEAQJFEDABQJBEDABRJxAAARRIxAECRRAwAUCQRAwAUScQAAEUSMQBAkUQMAFAkEQMAFEnEAABFEjEAQJFEDABQJBEDABRJxAAARRIxAECRRAwAUCQRAwAUScQAAEUSMQBAkUQMAFAkEQMAFEnEAABFEjEAQJFEDABQJBEDABRJxAAARRIxAECRRAwAUCQRAwAUScQAAEUSMQBAkUQMAFAkEQMAFEnEAABFEjEAQJFEDABQJBEDABRJxAAARRIxAECRRAwAUCQRAwAUScQAAEUSMQBAkUQMAFAkEQMAFEnEAABFEjEAQJFEDABQJBEDABRJxAAARRIxAECRRAwAUCQRAwAUScQAAEUSMQBAkUQMAFAkEQMAFOk9Rczq1avT0tKSvr6+xrGqqnLzzTenp6cn7e3tOf/88/PMM8+MuN/w8HCuueaazJgxI6ecckqWLl2aV155ZcTMwMBAent7U6vVUqvV0tvbm3379r2X5QIA48hJR8yOHTtyzz335Nd+7ddGHF+zZk3Wrl2bdevWZceOHenu7s5FF12U/fv3N2b6+vqyYcOGrF+/Po899ljefPPNLFmyJEeOHGnMLF++PLt27cqmTZuyadOm7Nq1K729vSe7XABgnDmpiHnzzTdz2WWX5d57782pp57aOF5VVe64447cdNNN+cIXvpA5c+bk/vvvz4EDB/LQQw8lSer1ev72b/823/nOd3LhhRfm7LPPzgMPPJAf//jH+ed//uckyXPPPZdNmzblb/7mbzJ//vzMnz8/9957b/7xH/8xL7zwwvtw2gBA6U4qYq6++up87nOfy4UXXjji+Isvvpj+/v4sWrSocay1tTULFy7M1q1bkyQ7d+7M4cOHR8z09PRkzpw5jZlt27alVqtl3rx5jZlzzz03tVqtMXOs4eHhDA4OjrgAAOPXpNHeYf369XnyySezY8eO427r7+9PknR1dY043tXVlZdeeqkxM2XKlBE7OO/MvHP//v7+dHZ2Hvf5Ozs7GzPHWr16dW655ZbRng4AUKhR7cTs3r07f/iHf5gHHnggbW1t/+NcS0vLiOtVVR137FjHzvy8+Xf7PCtXrky9Xm9cdu/e/a5fDwAo26giZufOndm7d2/mzp2bSZMmZdKkSdmyZUv++q//OpMmTWrswBy7W7J3797Gbd3d3Tl06FAGBgbedeb1118/7uu/8cYbx+3yvKO1tTXTpk0bcQEAxq9RRcwFF1yQH//4x9m1a1fjcs455+Syyy7Lrl278olPfCLd3d3ZvHlz4z6HDh3Kli1bsmDBgiTJ3LlzM3ny5BEze/bsydNPP92YmT9/fur1erZv396YeeKJJ1Kv1xszAMBH26heE9PR0ZE5c+aMOHbKKadk+vTpjeN9fX1ZtWpVZs+endmzZ2fVqlWZOnVqli9fniSp1Wq5/PLLc91112X69Ok57bTTcv311+ess85qvFD4zDPPzMUXX5wrrrgid999d5LkyiuvzJIlS3LGGWe855MGAMo36hf2/m9uuOGGDA0N5aqrrsrAwEDmzZuXRx99NB0dHY2Z22+/PZMmTcqyZcsyNDSUCy64IPfdd18mTpzYmHnwwQdz7bXXNn6LaenSpVm3bt37vVwAoFAtVVVVzV7EB2FwcDC1Wi31er3Y18cMDQ1l8eLFSZL9n+pNJk5u8ooAGLOOHE7Hk99PkmzcuDHt7e1NXtDJGc3jt/dOAgCKJGIAgCKJGACgSCIGACiSiAEAiiRiAIAiiRgAoEgiBgAokogBAIokYgCAIokYAKBIIgYAKJKIAQCKJGIAgCJNavYC+J9VVfX/rxw53LyFADD2/bfHiRGPH+OYiBnDhoeHGx93/Pv6Jq4EgJIMDw9n6tSpzV7GB87TSQBAkezEjGGtra2Nj/f/ny8mEyc3cTUAjGlHDjd27f/748d4JmLGsJaWlv9/ZeJkEQPACRnx+DGOeToJACiSiAEAiiRiAIAiiRgAoEgiBgAokogBAIokYgCAIokYAKBIIgYAKJKIAQCKJGIAgCKJGACgSCIGACiSiAEAiiRiAIAiiRgAoEgiBgAokogBAIokYgCAIokYAKBIIgYAKJKIAQCKJGIAgCKJGACgSCIGACiSiAEAiiRiAIAiiRgAoEgiBgAokogBAIokYgCAIokYAKBIIgYAKJKIAQCKJGIAgCKJGACgSCIGACiSiAEAiiRiAIAiiRgAoEgiBgAokogBAIo0qohZvXp1Pv3pT6ejoyOdnZ35/Oc/nxdeeGHETFVVufnmm9PT05P29vacf/75eeaZZ0bMDA8P55prrsmMGTNyyimnZOnSpXnllVdGzAwMDKS3tze1Wi21Wi29vb3Zt2/fyZ0lADDujCpitmzZkquvvjqPP/54Nm/enLfeeiuLFi3Kf/3XfzVm1qxZk7Vr12bdunXZsWNHuru7c9FFF2X//v2Nmb6+vmzYsCHr16/PY489ljfffDNLlizJkSNHGjPLly/Prl27smnTpmzatCm7du1Kb2/v+3DKAMB40FJVVXWyd37jjTfS2dmZLVu25Dd/8zdTVVV6enrS19eXG2+8Mcnbuy5dXV257bbb8rWvfS31ej2/9Eu/lO9///u59NJLkySvvfZaZs6cmUceeSS/9Vu/leeeey6f/OQn8/jjj2fevHlJkscffzzz58/P888/nzPOOON/Xdvg4GBqtVrq9XqmTZt2sqfYVENDQ1m8eHGSZP+nepOJk5u8IgDGrCOH0/Hk95MkGzduTHt7e5MXdHJG8/j9nl4TU6/XkySnnXZakuTFF19Mf39/Fi1a1JhpbW3NwoULs3Xr1iTJzp07c/jw4REzPT09mTNnTmNm27ZtqdVqjYBJknPPPTe1Wq0xc6zh4eEMDg6OuAAA49dJR0xVVVmxYkXOO++8zJkzJ0nS39+fJOnq6hox29XV1bitv78/U6ZMyamnnvquM52dncd9zc7OzsbMsVavXt14/UytVsvMmTNP9tQAgAKcdMR84xvfyH/8x3/kBz/4wXG3tbS0jLheVdVxx4517MzPm3+3z7Ny5crU6/XGZffu3SdyGgBAoU4qYq655pr88Ic/zL/927/l4x//eON4d3d3khy3W7J3797G7kx3d3cOHTqUgYGBd515/fXXj/u6b7zxxnG7PO9obW3NtGnTRlwAgPFrVBFTVVW+8Y1v5O///u/zr//6r5k1a9aI22fNmpXu7u5s3ry5cezQoUPZsmVLFixYkCSZO3duJk+ePGJmz549efrppxsz8+fPT71ez/bt2xszTzzxROr1emMGAPhomzSa4auvvjoPPfRQ/uEf/iEdHR2NHZdarZb29va0tLSkr68vq1atyuzZszN79uysWrUqU6dOzfLlyxuzl19+ea677rpMnz49p512Wq6//vqcddZZufDCC5MkZ555Zi6++OJcccUVufvuu5MkV155ZZYsWXJCv5kEAIx/o4qYO++8M0ly/vnnjzj+d3/3d/n93//9JMkNN9yQoaGhXHXVVRkYGMi8efPy6KOPpqOjozF/++23Z9KkSVm2bFmGhoZywQUX5L777svEiRMbMw8++GCuvfbaxm8xLV26NOvWrTuZcwQAxqH39HdixjJ/JwaAjxR/JwYAoAwiBgAokogBAIokYgCAIokYAKBIIgYAKJKIAQCKJGIAgCKJGACgSCIGACiSiAEAiiRiAIAiiRgAoEgiBgAokogBAIokYgCAIokYAKBIIgYAKJKIAQCKJGIAgCKJGACgSCIGACiSiAEAiiRiAIAiiRgAoEgiBgAokogBAIokYgCAIokYAKBIIgYAKJKIAQCKJGIAgCKJGACgSCIGACiSiAEAiiRiAIAiiRgAoEgiBgAokogBAIokYgCAIokYAKBIIgYAKJKIAQCKJGIAgCJNavYCODEtR99K1exFwFhQVcnRt97+eMKkpKWlueuBMaLlnZ+LjxARU4hf2PWDZi8BAMYUTycBAEWyEzOGtbW1ZePGjc1eBowpBw8ezCWXXJIk2bBhQ9ra2pq8Ihh7Pio/FyJmDGtpaUl7e3uzlwFjVltbm58R+AjzdBIAUCQRAwAUScQAAEUSMQBAkUQMAFAkEQMAFEnEAABFEjEAQJFEDABQJBEDABRJxAAARRrzEfO9730vs2bNSltbW+bOnZsf/ehHzV4SADAGjOmIefjhh9PX15ebbropTz31VD772c9m8eLFefnll5u9NACgycb0u1ivXbs2l19+ef7gD/4gSXLHHXfkn/7pn3LnnXdm9erVTV4dHyVVVeXgwYPNXgbJiO+D78nY0dbWlpaWlmYvg4+YMRsxhw4dys6dO/PNb35zxPFFixZl69atx80PDw9neHi4cX1wcPADXyMfHQcPHszixYubvQyOcckllzR7Cfw/GzduTHt7e7OXwUfMmH066ac//WmOHDmSrq6uEce7urrS399/3Pzq1atTq9Ual5kzZ35YSwUAmmDM7sS849jtyaqqfu6W5cqVK7NixYrG9cHBQSHD+6atrS0bN25s9jLI2/8HvLPr2tra6imMMaKtra3ZS+AjaMxGzIwZMzJx4sTjdl327t173O5M8vZ/Zq2trR/W8viIaWlpsVU+hkydOrXZSwDGgDH7dNKUKVMyd+7cbN68ecTxzZs3Z8GCBU1aFQAwVozZnZgkWbFiRXp7e3POOedk/vz5ueeee/Lyyy/n61//erOXBgA02ZiOmEsvvTQ/+9nPcuutt2bPnj2ZM2dOHnnkkZx++unNXhoA0GQtVVVVzV7EB2FwcDC1Wi31ej3Tpk1r9nIAgBMwmsfvMfuaGACAdyNiAIAiiRgAoEgiBgAokogBAIokYgCAIokYAKBIIgYAKJKIAQCKNKbfduC9eOcPEQ8ODjZ5JQDAiXrncftE3lBg3EbM/v37kyQzZ85s8koAgNHav39/arXau86M2/dOOnr0aF577bV0dHSkpaWl2csB3keDg4OZOXNmdu/e7b3RYJypqir79+9PT09PJkx491e9jNuIAcYvb/AKJF7YCwAUSsQAAEUSMUBxWltb8xd/8RdpbW1t9lKAJvKaGACgSHZiAIAiiRgAoEgiBgAokogBAIokYoDifO9738usWbPS1taWuXPn5kc/+lGzlwQ0gYgBivLwww+nr68vN910U5566ql89rOfzeLFi/Pyyy83e2nAh8yvWANFmTdvXj71qU/lzjvvbBw788wz8/nPfz6rV69u4sqAD5udGKAYhw4dys6dO7No0aIRxxctWpStW7c2aVVAs4gYoBg//elPc+TIkXR1dY043tXVlf7+/iatCmgWEQMUp6WlZcT1qqqOOwaMfyIGKMaMGTMyceLE43Zd9u7de9zuDDD+iRigGFOmTMncuXOzefPmEcc3b96cBQsWNGlVQLNMavYCAEZjxYoV6e3tzTnnnJP58+fnnnvuycsvv5yvf/3rzV4a8CETMUBRLr300vzsZz/Lrbfemj179mTOnDl55JFHcvrppzd7acCHzN+JAQCK5DUxAECRRAwAUCQRAwAUScQAAEUSMQBAkUQMAFAkEQMAFEnEAABFEjEAQJFEDABQJBEDABRJxAAARfq/OaJyG730380AAAAASUVORK5CYII=", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "sns.boxplot(data['MSA'])\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 281, + "id": "71386412", + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "count 95280.000000\n", + "mean 3527.744102\n", + "std 2863.904737\n", + "min 0.000000\n", + "25% 520.000000\n", + "50% 3350.000000\n", + "75% 5960.000000\n", + "max 9360.000000\n", + "Name: MSA, dtype: float64" + ] + }, + "execution_count": 281, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data['MSA'].describe()" + ] + }, + { + "cell_type": "code", + "execution_count": 285, + "id": "3445cb8b", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "min: 0.0, Q1: 520.0, Q2: 3350.0, Q3: 5960.0, max: 9360.0\n" + ] + } + ], + "source": [ + "# Calculate quantiles and IQR for the MSA column\n", + "min_val_msa = data['MSA'].min()\n", + "Q1_msa = data['MSA'].quantile(0.25)\n", + "Q2_msa = data['MSA'].median()\n", + "Q3_msa = data['MSA'].quantile(0.75)\n", + "IQR_msa = Q3_msa - Q1_msa\n", + "max_val_msa = data['MSA'].max()\n", + "lower_bound_msa = Q1_msa - 1.5 * IQR_msa\n", + "upper_bound_msa = Q3_msa + 1.5 * IQR_msa\n", + "\n", + "print(f\"min: {min_val_msa}, Q1: {Q1_msa}, Q2: {Q2_msa}, Q3: {Q3_msa}, max: {max_val_msa}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 287, + "id": "1b69528e", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Counts in each group:\n", + "MSA_bin\n", + "Lower Bound 24487\n", + "Q1 23188\n", + "Q3 23959\n", + "Upper Bound 23646\n", + "dtype: int64\n", + "\n" + ] + } + ], + "source": [ + "# Define bins and labels\n", + "bins_msa = [min_val_msa, Q1_msa, Q2_msa, Q3_msa, max_val_msa] \n", + "labels_msa = ['Lower Bound', 'Q1', 'Q3', 'Upper Bound'] \n", + "\n", + "# Create a new column with the binned ranges\n", + "data['MSA_bin'] = pd.cut(data['MSA'], bins=bins_msa, labels=labels_msa, include_lowest=True)\n", + "\n", + "# Group by the new 'DMA_bin' column\n", + "grouped_msa = data.groupby('MSA_bin')\n", + "\n", + "# Get counts for each group\n", + "print(\"Counts in each group:\")\n", + "print(grouped_msa.size())\n", + "print()" + ] + }, + { + "cell_type": "code", + "execution_count": 288, + "id": "67721709", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 0\n", + "1 2\n", + "2 0\n", + "3 3\n", + "4 2\n", + " ..\n", + "95407 0\n", + "95408 2\n", + "95409 2\n", + "95410 3\n", + "95411 3\n", + "Name: MSA, Length: 95280, dtype: category\n", + "Categories (4, int64): [0 < 1 < 2 < 3]" + ] + }, + "execution_count": 288, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Define the ordinal encoding for each variable\n", + "msa_mapping = {'Lower Bound':0, 'Q1':1, 'Q3':2, 'Upper Bound':3}\n", + "\n", + "# Apply ordinal encoding with the custom mappings\n", + "data['MSA'] = data['MSA_bin'].map(msa_mapping)\n", + "\n", + "# Drop the 'MSA_bin' column if no longer needed\n", + "data.drop('MSA_bin', axis=1, inplace=True)\n", + "data['MSA']" + ] + }, + { + "cell_type": "markdown", + "id": "b91220a1", + "metadata": {}, + "source": [ + "#### Columns cleaned" + ] + }, + { + "cell_type": "code", + "execution_count": 289, + "id": "1264908b", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
GENDERGEOCODE2WEALTH1ADIDMAMSA
0FC5.0330
1MA9.0042
2MC1.0410
3FC4.0143
4FA2.0112
.....................
95407MC4.0030
95408MA9.0322
95409MB4.0012
95410FA8.0043
95411FC8.0413
\n", + "

95280 rows × 6 columns

\n", + "
" + ], + "text/plain": [ + " GENDER GEOCODE2 WEALTH1 ADI DMA MSA\n", + "0 F C 5.0 3 3 0\n", + "1 M A 9.0 0 4 2\n", + "2 M C 1.0 4 1 0\n", + "3 F C 4.0 1 4 3\n", + "4 F A 2.0 1 1 2\n", + "... ... ... ... .. .. ..\n", + "95407 M C 4.0 0 3 0\n", + "95408 M A 9.0 3 2 2\n", + "95409 M B 4.0 0 1 2\n", + "95410 F A 8.0 0 4 3\n", + "95411 F C 8.0 4 1 3\n", + "\n", + "[95280 rows x 6 columns]" + ] + }, + "execution_count": 289, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data[['GENDER','GEOCODE2','WEALTH1','ADI','DMA','MSA']]" + ] + }, + { + "cell_type": "code", + "execution_count": 290, + "id": "cb32e020", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(95280, 454)" + ] + }, + "execution_count": 290, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 291, + "id": "bf0c7685", + "metadata": {}, + "outputs": [], + "source": [ + "#data.to_csv('learningSet.csv', index=False)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bce6d1b8", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.5" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}