In [3]:
import pandas as pd
from sklearn.preprocessing import StandardScaler, MinMaxScaler, LabelEncoder
from sklearn.ensemble import IsolationForest
import numpy as np

In [4]:
!pip install ppscore



In [5]:
import ppscore as pps

In [6]:
data = pd.read_csv("adult_with_headers.csv")

In [7]:
data

Unnamed: 0,age,workclass,fnlwgt,education,education_num,marital_status,occupation,relationship,race,sex,capital_gain,capital_loss,hours_per_week,native_country,income
0,39,State-gov,77516,Bachelors,13,Never-married,Adm-clerical,Not-in-family,White,Male,2174,0,40,United-States,<=50K
1,50,Self-emp-not-inc,83311,Bachelors,13,Married-civ-spouse,Exec-managerial,Husband,White,Male,0,0,13,United-States,<=50K
2,38,Private,215646,HS-grad,9,Divorced,Handlers-cleaners,Not-in-family,White,Male,0,0,40,United-States,<=50K
3,53,Private,234721,11th,7,Married-civ-spouse,Handlers-cleaners,Husband,Black,Male,0,0,40,United-States,<=50K
4,28,Private,338409,Bachelors,13,Married-civ-spouse,Prof-specialty,Wife,Black,Female,0,0,40,Cuba,<=50K
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
32556,27,Private,257302,Assoc-acdm,12,Married-civ-spouse,Tech-support,Wife,White,Female,0,0,38,United-States,<=50K
32557,40,Private,154374,HS-grad,9,Married-civ-spouse,Machine-op-inspct,Husband,White,Male,0,0,40,United-States,>50K
32558,58,Private,151910,HS-grad,9,Widowed,Adm-clerical,Unmarried,White,Female,0,0,40,United-States,<=50K
32559,22,Private,201490,HS-grad,9,Never-married,Adm-clerical,Own-child,White,Male,0,0,20,United-States,<=50K


In [8]:
# Get basic information about the dataset
data_info = data.info()
summary_statistics = data.describe(include='all')
missing_values = data.isnull().sum()



<class 'pandas.core.frame.DataFrame'>
RangeIndex: 32561 entries, 0 to 32560
Data columns (total 15 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   age             32561 non-null  int64 
 1   workclass       32561 non-null  object
 2   fnlwgt          32561 non-null  int64 
 3   education       32561 non-null  object
 4   education_num   32561 non-null  int64 
 5   marital_status  32561 non-null  object
 6   occupation      32561 non-null  object
 7   relationship    32561 non-null  object
 8   race            32561 non-null  object
 9   sex             32561 non-null  object
 10  capital_gain    32561 non-null  int64 
 11  capital_loss    32561 non-null  int64 
 12  hours_per_week  32561 non-null  int64 
 13  native_country  32561 non-null  object
 14  income          32561 non-null  object
dtypes: int64(6), object(9)
memory usage: 2.6+ MB


In [9]:
data_info, summary_statistics, missing_values

(None,
                  age workclass        fnlwgt education  education_num  \
 count   32561.000000     32561  3.256100e+04     32561   32561.000000   
 unique           NaN         9           NaN        16            NaN   
 top              NaN   Private           NaN   HS-grad            NaN   
 freq             NaN     22696           NaN     10501            NaN   
 mean       38.581647       NaN  1.897784e+05       NaN      10.080679   
 std        13.640433       NaN  1.055500e+05       NaN       2.572720   
 min        17.000000       NaN  1.228500e+04       NaN       1.000000   
 25%        28.000000       NaN  1.178270e+05       NaN       9.000000   
 50%        37.000000       NaN  1.783560e+05       NaN      10.000000   
 75%        48.000000       NaN  2.370510e+05       NaN      12.000000   
 max        90.000000       NaN  1.484705e+06       NaN      16.000000   
 
              marital_status       occupation relationship    race    sex  \
 count                 325

In [10]:
# Separate numerical and categorical features
numerical_features = ['age', 'fnlwgt', 'education_num', 'capital_gain', 'capital_loss', 'hours_per_week']
categorical_features = ['workclass', 'education', 'marital_status', 'occupation', 'relationship', 'race', 'sex', 'native_country']

In [11]:
# Task 1: Apply Scaling Techniques
# Standard Scaler
standard_scaler = StandardScaler()
data_standard_scaled = data.copy()
data_standard_scaled[numerical_features] = standard_scaler.fit_transform(data[numerical_features])
data_standard_scaled

Unnamed: 0,age,workclass,fnlwgt,education,education_num,marital_status,occupation,relationship,race,sex,capital_gain,capital_loss,hours_per_week,native_country,income
0,0.030671,State-gov,-1.063611,Bachelors,1.134739,Never-married,Adm-clerical,Not-in-family,White,Male,0.148453,-0.21666,-0.035429,United-States,<=50K
1,0.837109,Self-emp-not-inc,-1.008707,Bachelors,1.134739,Married-civ-spouse,Exec-managerial,Husband,White,Male,-0.145920,-0.21666,-2.222153,United-States,<=50K
2,-0.042642,Private,0.245079,HS-grad,-0.420060,Divorced,Handlers-cleaners,Not-in-family,White,Male,-0.145920,-0.21666,-0.035429,United-States,<=50K
3,1.057047,Private,0.425801,11th,-1.197459,Married-civ-spouse,Handlers-cleaners,Husband,Black,Male,-0.145920,-0.21666,-0.035429,United-States,<=50K
4,-0.775768,Private,1.408176,Bachelors,1.134739,Married-civ-spouse,Prof-specialty,Wife,Black,Female,-0.145920,-0.21666,-0.035429,Cuba,<=50K
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
32556,-0.849080,Private,0.639741,Assoc-acdm,0.746039,Married-civ-spouse,Tech-support,Wife,White,Female,-0.145920,-0.21666,-0.197409,United-States,<=50K
32557,0.103983,Private,-0.335433,HS-grad,-0.420060,Married-civ-spouse,Machine-op-inspct,Husband,White,Male,-0.145920,-0.21666,-0.035429,United-States,>50K
32558,1.423610,Private,-0.358777,HS-grad,-0.420060,Widowed,Adm-clerical,Unmarried,White,Female,-0.145920,-0.21666,-0.035429,United-States,<=50K
32559,-1.215643,Private,0.110960,HS-grad,-0.420060,Never-married,Adm-clerical,Own-child,White,Male,-0.145920,-0.21666,-1.655225,United-States,<=50K


In [12]:
# Min-Max Scaler
min_max_scaler = MinMaxScaler()
data_min_max_scaled = data.copy()
data_min_max_scaled[numerical_features] = min_max_scaler.fit_transform(data[numerical_features])
data_min_max_scaled

Unnamed: 0,age,workclass,fnlwgt,education,education_num,marital_status,occupation,relationship,race,sex,capital_gain,capital_loss,hours_per_week,native_country,income
0,0.301370,State-gov,0.044302,Bachelors,0.800000,Never-married,Adm-clerical,Not-in-family,White,Male,0.021740,0.0,0.397959,United-States,<=50K
1,0.452055,Self-emp-not-inc,0.048238,Bachelors,0.800000,Married-civ-spouse,Exec-managerial,Husband,White,Male,0.000000,0.0,0.122449,United-States,<=50K
2,0.287671,Private,0.138113,HS-grad,0.533333,Divorced,Handlers-cleaners,Not-in-family,White,Male,0.000000,0.0,0.397959,United-States,<=50K
3,0.493151,Private,0.151068,11th,0.400000,Married-civ-spouse,Handlers-cleaners,Husband,Black,Male,0.000000,0.0,0.397959,United-States,<=50K
4,0.150685,Private,0.221488,Bachelors,0.800000,Married-civ-spouse,Prof-specialty,Wife,Black,Female,0.000000,0.0,0.397959,Cuba,<=50K
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
32556,0.136986,Private,0.166404,Assoc-acdm,0.733333,Married-civ-spouse,Tech-support,Wife,White,Female,0.000000,0.0,0.377551,United-States,<=50K
32557,0.315068,Private,0.096500,HS-grad,0.533333,Married-civ-spouse,Machine-op-inspct,Husband,White,Male,0.000000,0.0,0.397959,United-States,>50K
32558,0.561644,Private,0.094827,HS-grad,0.533333,Widowed,Adm-clerical,Unmarried,White,Female,0.000000,0.0,0.397959,United-States,<=50K
32559,0.068493,Private,0.128499,HS-grad,0.533333,Never-married,Adm-clerical,Own-child,White,Male,0.000000,0.0,0.193878,United-States,<=50K


In [13]:
# Task 2: Apply Encoding Techniques
# One-Hot Encoding for variables with less than 5 categories
categorical_to_onehot = ['sex', 'income']  # income as the target
data_encoded = pd.get_dummies(data, columns=categorical_to_onehot, drop_first=True)
data_encoded

Unnamed: 0,age,workclass,fnlwgt,education,education_num,marital_status,occupation,relationship,race,capital_gain,capital_loss,hours_per_week,native_country,sex_ Male,income_ >50K
0,39,State-gov,77516,Bachelors,13,Never-married,Adm-clerical,Not-in-family,White,2174,0,40,United-States,1,0
1,50,Self-emp-not-inc,83311,Bachelors,13,Married-civ-spouse,Exec-managerial,Husband,White,0,0,13,United-States,1,0
2,38,Private,215646,HS-grad,9,Divorced,Handlers-cleaners,Not-in-family,White,0,0,40,United-States,1,0
3,53,Private,234721,11th,7,Married-civ-spouse,Handlers-cleaners,Husband,Black,0,0,40,United-States,1,0
4,28,Private,338409,Bachelors,13,Married-civ-spouse,Prof-specialty,Wife,Black,0,0,40,Cuba,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
32556,27,Private,257302,Assoc-acdm,12,Married-civ-spouse,Tech-support,Wife,White,0,0,38,United-States,0,0
32557,40,Private,154374,HS-grad,9,Married-civ-spouse,Machine-op-inspct,Husband,White,0,0,40,United-States,1,1
32558,58,Private,151910,HS-grad,9,Widowed,Adm-clerical,Unmarried,White,0,0,40,United-States,0,0
32559,22,Private,201490,HS-grad,9,Never-married,Adm-clerical,Own-child,White,0,0,20,United-States,1,0


In [14]:
# Label Encoding for variables with more than 5 categories
label_encoder = LabelEncoder()
categorical_to_label = ['workclass', 'education', 'marital_status', 'occupation', 'relationship', 'race', 'native_country']
for col in categorical_to_label:
    data_encoded[col] = label_encoder.fit_transform(data_encoded[col])
label_encoder

LabelEncoder()

In [16]:
# Task 3: Feature Engineering
# Create new features
data_encoded['capital_diff'] = data_encoded['capital_gain'] - data_encoded['capital_loss']  # Capital difference feature
data_encoded['hours_per_week_bins'] = pd.cut(data_encoded['hours_per_week'], bins=[0, 30, 40, 50, 100], labels=[0, 1, 2, 3])  # Binning hours per week

# Log transformation for skewed features
data_encoded['capital_gain_log'] = np.log1p(data_encoded['capital_gain'])  # Log transform for capital_gain
data_encoded

Unnamed: 0,age,workclass,fnlwgt,education,education_num,marital_status,occupation,relationship,race,capital_gain,capital_loss,hours_per_week,native_country,sex_ Male,income_ >50K,capital_diff,hours_per_week_bins,capital_gain_log
0,39,7,77516,9,13,4,1,1,4,2174,0,40,39,1,0,2174,1,7.684784
1,50,6,83311,9,13,2,4,0,4,0,0,13,39,1,0,0,0,0.000000
2,38,4,215646,11,9,0,6,1,4,0,0,40,39,1,0,0,1,0.000000
3,53,4,234721,1,7,2,6,0,2,0,0,40,39,1,0,0,1,0.000000
4,28,4,338409,9,13,2,10,5,2,0,0,40,5,0,0,0,1,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
32556,27,4,257302,7,12,2,13,5,4,0,0,38,39,0,0,0,1,0.000000
32557,40,4,154374,11,9,2,7,0,4,0,0,40,39,1,1,0,1,0.000000
32558,58,4,151910,11,9,6,1,4,4,0,0,40,39,0,0,0,1,0.000000
32559,22,4,201490,11,9,4,1,3,4,0,0,20,39,1,0,0,0,0.000000


In [20]:
# Task 4: Feature Selection
# Using Isolation Forest for outlier detection
iso_forest = IsolationForest(contamination=0.1, random_state=42)
outliers = iso_forest.fit_predict(data_encoded[numerical_features])
data_filtered = data_encoded[outliers == 1]  # Keep only non-outliers
iso_forest

IsolationForest(contamination=0.1, random_state=42)

In [21]:
outliers

array([ 1,  1,  1, ...,  1,  1, -1])

In [22]:
data_filtered

Unnamed: 0,age,workclass,fnlwgt,education,education_num,marital_status,occupation,relationship,race,capital_gain,capital_loss,hours_per_week,native_country,sex_ Male,income_ >50K,capital_diff,hours_per_week_bins,capital_gain_log
0,39,7,77516,9,13,4,1,1,4,2174,0,40,39,1,0,2174,1,7.684784
1,50,6,83311,9,13,2,4,0,4,0,0,13,39,1,0,0,0,0.000000
2,38,4,215646,11,9,0,6,1,4,0,0,40,39,1,0,0,1,0.000000
3,53,4,234721,1,7,2,6,0,2,0,0,40,39,1,0,0,1,0.000000
4,28,4,338409,9,13,2,10,5,2,0,0,40,5,0,0,0,1,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
32555,22,4,310152,15,10,4,11,1,4,0,0,40,39,1,0,0,1,0.000000
32556,27,4,257302,7,12,2,13,5,4,0,0,38,39,0,0,0,1,0.000000
32557,40,4,154374,11,9,2,7,0,4,0,0,40,39,1,1,0,1,0.000000
32558,58,4,151910,11,9,6,1,4,4,0,0,40,39,0,0,0,1,0.000000


In [23]:
# PPS (Predictive Power Score) analysis
pps_matrix = pps.matrix(data_filtered).pivot(columns='x', index='y', values='ppscore')

In [24]:
pps_matrix

x,age,capital_diff,capital_gain,capital_gain_log,capital_loss,education,education_num,fnlwgt,hours_per_week,hours_per_week_bins,income_ >50K,marital_status,native_country,occupation,race,relationship,sex_ Male,workclass
y,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
age,1.0,0.0,0.0,0.0,0.0,0.01468193,0.01468193,0.0,0.012103,0.018836,0.03734149,0.215538,0.0,0.01847585,0.0,0.1561375,0.001277759,0.021392
capital_diff,0.0,1.0,0.988046,0.988046,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
capital_gain,0.0,0.998294,1.0,0.998294,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
capital_gain_log,0.0,0.999567,0.999567,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
capital_loss,0.0,0.936657,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
education,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
education_num,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.017654,0.0,0.0,0.1445132,0.0,0.0,0.0,0.0
fnlwgt,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.006627772,0.0,0.0,0.0
hours_per_week,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.640318,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
hours_per_week_bins,0.104002,0.008701,0.008623,0.008623,8.7e-05,1.057638e-07,1.057638e-07,0.0,0.999664,1.0,1.057638e-07,0.0,0.004033,1.057638e-07,1.057638e-07,1.057638e-07,1.057638e-07,0.00144


In [25]:
# Correlation matrix for comparison
correlation_matrix = data_filtered.corr()
correlation_matrix

Unnamed: 0,age,workclass,fnlwgt,education,education_num,marital_status,occupation,relationship,race,capital_gain,capital_loss,hours_per_week,native_country,sex_ Male,income_ >50K,capital_diff,capital_gain_log
age,1.0,0.025046,-0.075886,0.001255,0.040751,-0.304068,-0.008613,-0.263274,0.026422,0.073948,0.017349,0.134029,-0.001155,0.087805,0.236144,0.073353,0.073646
workclass,0.025046,1.0,-0.018102,0.016205,0.044832,-0.056807,0.241394,-0.096251,0.048669,0.009826,0.00267,0.117038,-0.001247,0.093257,0.033931,0.009735,0.006091
fnlwgt,-0.075886,-0.018102,1.0,-0.014886,-0.033452,0.027059,-0.002773,0.00939,-0.02474,-0.018857,0.000989,-0.018524,-0.045791,0.027072,-0.014742,-0.018874,-0.01785
education,0.001255,0.016205,-0.014886,1.0,0.335076,-0.027905,-0.028106,-0.00773,0.014927,0.014911,0.008165,0.038441,0.059263,-0.03184,0.064153,0.014647,0.015224
education_num,0.040751,0.044832,-0.033452,0.335076,1.0,-0.046753,0.104722,-0.087416,0.025975,0.092891,0.011332,0.128404,0.037359,0.000928,0.297777,0.092468,0.071153
marital_status,-0.304068,-0.056807,0.027059,-0.027905,-0.046753,1.0,-0.004397,0.182624,-0.067678,-0.060035,-0.017672,-0.192486,-0.020511,-0.115723,-0.190545,-0.059441,-0.056126
occupation,-0.008613,0.241394,-0.002773,-0.028106,0.104722,-0.004397,1.0,-0.076853,0.007661,0.015922,-0.001015,0.064949,-0.010536,0.082112,0.065321,0.015941,0.011118
relationship,-0.263274,-0.096251,0.00939,-0.00773,-0.087416,0.182624,-0.076853,1.0,-0.117904,-0.072033,0.01005,-0.265124,-0.003454,-0.575746,-0.242569,-0.07229,-0.065536
race,0.026422,0.048669,-0.02474,0.014927,0.025975,-0.067678,0.007661,-0.117904,1.0,0.014827,0.005732,0.046646,0.136182,0.087384,0.07029,0.014638,0.018098
capital_gain,0.073948,0.009826,-0.018857,0.014911,0.092891,-0.060035,0.015922,-0.072033,0.014827,1.0,-0.009268,0.057484,0.017643,0.053649,0.272243,0.999518,0.884631


In [26]:
# Display results
print("Standard Scaled Data:")
print(data_standard_scaled.head())

Standard Scaled Data:
        age          workclass    fnlwgt   education  education_num  \
0  0.030671          State-gov -1.063611   Bachelors       1.134739   
1  0.837109   Self-emp-not-inc -1.008707   Bachelors       1.134739   
2 -0.042642            Private  0.245079     HS-grad      -0.420060   
3  1.057047            Private  0.425801        11th      -1.197459   
4 -0.775768            Private  1.408176   Bachelors       1.134739   

        marital_status          occupation    relationship    race      sex  \
0        Never-married        Adm-clerical   Not-in-family   White     Male   
1   Married-civ-spouse     Exec-managerial         Husband   White     Male   
2             Divorced   Handlers-cleaners   Not-in-family   White     Male   
3   Married-civ-spouse   Handlers-cleaners         Husband   Black     Male   
4   Married-civ-spouse      Prof-specialty            Wife   Black   Female   

   capital_gain  capital_loss  hours_per_week  native_country  income  
0   

In [27]:
print("\nMin-Max Scaled Data:")
print(data_min_max_scaled.head())


Min-Max Scaled Data:
        age          workclass    fnlwgt   education  education_num  \
0  0.301370          State-gov  0.044302   Bachelors       0.800000   
1  0.452055   Self-emp-not-inc  0.048238   Bachelors       0.800000   
2  0.287671            Private  0.138113     HS-grad       0.533333   
3  0.493151            Private  0.151068        11th       0.400000   
4  0.150685            Private  0.221488   Bachelors       0.800000   

        marital_status          occupation    relationship    race      sex  \
0        Never-married        Adm-clerical   Not-in-family   White     Male   
1   Married-civ-spouse     Exec-managerial         Husband   White     Male   
2             Divorced   Handlers-cleaners   Not-in-family   White     Male   
3   Married-civ-spouse   Handlers-cleaners         Husband   Black     Male   
4   Married-civ-spouse      Prof-specialty            Wife   Black   Female   

   capital_gain  capital_loss  hours_per_week  native_country  income  
0   

In [28]:
print("\nEncoded Data:")
print(data_encoded.head())


Encoded Data:
   age  workclass  fnlwgt  education  education_num  marital_status  \
0   39          7   77516          9             13               4   
1   50          6   83311          9             13               2   
2   38          4  215646         11              9               0   
3   53          4  234721          1              7               2   
4   28          4  338409          9             13               2   

   occupation  relationship  race  capital_gain  capital_loss  hours_per_week  \
0           1             1     4          2174             0              40   
1           4             0     4             0             0              13   
2           6             1     4             0             0              40   
3           6             0     2             0             0              40   
4          10             5     2             0             0              40   

   native_country  sex_ Male  income_ >50K  capital_diff hours_per_week

In [29]:
print("\nFiltered Data (Outliers Removed):")
print(data_filtered.head())


Filtered Data (Outliers Removed):
   age  workclass  fnlwgt  education  education_num  marital_status  \
0   39          7   77516          9             13               4   
1   50          6   83311          9             13               2   
2   38          4  215646         11              9               0   
3   53          4  234721          1              7               2   
4   28          4  338409          9             13               2   

   occupation  relationship  race  capital_gain  capital_loss  hours_per_week  \
0           1             1     4          2174             0              40   
1           4             0     4             0             0              13   
2           6             1     4             0             0              40   
3           6             0     2             0             0              40   
4          10             5     2             0             0              40   

   native_country  sex_ Male  income_ >50K  capital

In [30]:
print("\nPPS Matrix:")
print(pps_matrix)


PPS Matrix:
x                         age  capital_diff  capital_gain  capital_gain_log  \
y                                                                             
age                  1.000000      0.000000      0.000000          0.000000   
capital_diff         0.000000      1.000000      0.988046          0.988046   
capital_gain         0.000000      0.998294      1.000000          0.998294   
capital_gain_log     0.000000      0.999567      0.999567          1.000000   
capital_loss         0.000000      0.936657      0.000000          0.000000   
education            0.000000      0.000000      0.000000          0.000000   
education_num        0.000000      0.000000      0.000000          0.000000   
fnlwgt               0.000000      0.000000      0.000000          0.000000   
hours_per_week       0.000000      0.000000      0.000000          0.000000   
hours_per_week_bins  0.104002      0.008701      0.008623          0.008623   
income_ >50K         0.000000      0.00

In [31]:
print("\nCorrelation Matrix:")
print(correlation_matrix)


Correlation Matrix:
                       age  workclass    fnlwgt  education  education_num  \
age               1.000000   0.025046 -0.075886   0.001255       0.040751   
workclass         0.025046   1.000000 -0.018102   0.016205       0.044832   
fnlwgt           -0.075886  -0.018102  1.000000  -0.014886      -0.033452   
education         0.001255   0.016205 -0.014886   1.000000       0.335076   
education_num     0.040751   0.044832 -0.033452   0.335076       1.000000   
marital_status   -0.304068  -0.056807  0.027059  -0.027905      -0.046753   
occupation       -0.008613   0.241394 -0.002773  -0.028106       0.104722   
relationship     -0.263274  -0.096251  0.009390  -0.007730      -0.087416   
race              0.026422   0.048669 -0.024740   0.014927       0.025975   
capital_gain      0.073948   0.009826 -0.018857   0.014911       0.092891   
capital_loss      0.017349   0.002670  0.000989   0.008165       0.011332   
hours_per_week    0.134029   0.117038 -0.018524   0.038