Первоначальный код от Саши Зырянова. Приведен в порядок, обеспечена линейность выполнения.

Содержание: удалена большая часть признаков, категориальные пропущены через one-hot encoder, созданы признаки count, добавлено сравнение job1Position с некоторыми ключевыми словами с помощью w2v, признак profession приведен к менее разрозненному виду и отфильтрован по признаку count > 50.

На полученных признаках произведено моделирование и посчитано MAE и MAE без 10% худших предскзаний:
  для разных моделей: RandomForest, LinearRegression и Lasso
  для кандидатской и ассессорской целевых переменных

In [1]:
from string import punctuation

import pandas as pd

import numpy as np

from sklearn.preprocessing import OneHotEncoder

from sklearn.ensemble import RandomForestRegressor
from sklearn.dummy import DummyRegressor
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Lasso

from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold

from sklearn.metrics import mean_absolute_error

from scipy.spatial.distance import cosine

from gensim.models import KeyedVectors

import pymorphy2

%matplotlib inline

In [2]:
def count_ecoder(feature):
    counts = feature.value_counts()
    
    return feature.map(counts)

In [12]:
assessor_data = pd.read_csv('../data/raw/assessor_train.csv', index_col='Unnamed: 0')

In [4]:
assessor_data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 10124 entries, 0 to 10123
Data columns (total 58 columns):
idProfessionTree                10123 non-null float64
rating                          10124 non-null int64
payment                         10124 non-null int64
town                            10124 non-null int64
region                          10124 non-null int64
citizenship                     4762 non-null float64
typeOfWork                      10124 non-null int64
placeOfWork                     10124 non-null int64
age                             10124 non-null int64
educationLevel                  10124 non-null int64
computerSkillsLevel             10124 non-null int64
readyToRelocate                 10124 non-null int64
readyToBusinessTrips            10124 non-null int64
keySkills                       7994 non-null object
publishedAt                     10124 non-null int64
profession                      10124 non-null object
experienceMonthTotal            10124 n

In [5]:
assessor_data = assessor_data.drop(['region',
                                    'idProfessionTree',
                                    'driverLicenseA',
                                    'driverLicenseB',
                                    'driverLicenseC',
                                    'driverLicenseD',
                                    'driverLicenseE',
                                    'catalog1',
                                    'catalog2',
                                    'catalog3',
                                    'catalog4',
                                    'catalog5',
                                    'eduHigher1Institute',
                                    'eduHigher1Faculty',
                                    'eduHigher1Speciality',
                                    'eduHigher2Institute',
                                    'eduHigher2Faculty',
                                    'eduHigher2Speciality',
                                    'job1CompanyName',
                                    'job1Description',
                                    'job2Position',
                                    'job2CompanyName',
                                    'job2Description',
                                    'eduDoctorateCount',
                                    'eduPhDCount',
                                    'keySkills',
                                    'publishedAt'], axis=1)

In [6]:
assessor_data['married'] = assessor_data['married'].factorize()[0] + 1
assessor_data['hasKids'] = assessor_data['hasKids'].factorize()[0] + 1
assessor_data['gender'] = assessor_data['gender'].factorize()[0]
assessor_data['currentlyEmployed'] = assessor_data['currentlyEmployed'].factorize()[0]

In [7]:
assessor_data['citizenship'] = assessor_data['citizenship'].notnull()
assessor_data['typeOfWork'] = assessor_data['typeOfWork'] != 6

In [8]:
is_capital = (assessor_data['town'] == 4) | (assessor_data['town'] == 14)
assessor_data = assessor_data[is_capital]
assessor_data['town'], _ = assessor_data['town'].factorize()

In [9]:
assessor_data['age_count'] = count_ecoder(assessor_data['age'])
assessor_data['educationLevel_count'] = count_ecoder(assessor_data['educationLevel'])
assessor_data['computerSkillsLevel_count'] = count_ecoder(assessor_data['computerSkillsLevel'])
assessor_data['bestLanguageId_count'] = count_ecoder(assessor_data['bestLanguageId'])
assessor_data['bestLanguageLevel_count'] = count_ecoder(assessor_data['bestLanguageLevel'])

assessor_data = assessor_data.drop('bestLanguageId', axis=1)

In [10]:
ohe_features = ['readyToBusinessTrips', 'married', 'hasKids']

enc = OneHotEncoder()
assessor_data_ohe = enc.fit_transform(assessor_data[ohe_features])
assessor_data_ohe = assessor_data_ohe.toarray()
assessor_data_ohe = pd.DataFrame(assessor_data_ohe, index=assessor_data.index)
assessor_data_ohe.rename(columns={0:'readyToBusinessTrips0', 1:'readyToBusinessTrips1', 2:'readyToBusinessTrips2',
                                  3:'married_nan', 4:'married_no', 5:'married_yes',
                                  6:'hasKids_nan', 7:'hasKids_no', 8:'hasKids_yes'}, inplace=True)

assessor_data = pd.concat([assessor_data, assessor_data_ohe], axis=1)
assessor_data = assessor_data.drop(ohe_features, axis=1)

In [11]:
assessor_data['job1Position'] = assessor_data['job1Position'].fillna('менеджер по продажам')

In [12]:
assessor_data['job1Position'] = assessor_data['job1Position'].str.lower()

In [13]:
drop_punctuation_table = dict.fromkeys(map(ord, punctuation), ' ')

assessor_data['job1Position'] = assessor_data['job1Position'].str.translate(drop_punctuation_table)

In [14]:
assessor_data['job1Position'] = assessor_data['job1Position'].str.split()

In [15]:
def prepare_job_poisition(words, morph):
    out = []
    for word in words:
        parsed_word = morph.parse(word)[0]
        
        pos = parsed_word.tag.POS
        if pos != 'NOUN':
            continue
            
        normal_form = parsed_word.normal_form
        
        out.append(normal_form + '_' + pos)
        
    return out

assessor_data['job1Position'] = assessor_data['job1Position'].apply(prepare_job_poisition, morph=pymorphy2.MorphAnalyzer())

In [16]:
w2v = KeyedVectors.load_word2vec_format('../models/ruwikiruscorpora_0_300_20.bin', binary=True)

In [17]:
def match_job_position(words, w2v, match_vector):
    job_vector = None
    for word in words:
        try:
            word_wector = w2v[word]
        except KeyError:
            continue
            
        if job_vector is None:
            job_vector = word_wector
        else:    
            job_vector += word_wector
            
    if job_vector is None:
        return None
            
    job_vector /= len(words)
    
    return cosine(job_vector, match_vector)

manager_vector = w2v['менеджер_NOUN']
chief_vector = w2v['руководитель_NOUN']
sales_vector = w2v['продажа_NOUN']

assessor_data['job1PositionMatchManager'] = assessor_data['job1Position'].apply(match_job_position,
                                                                                w2v=w2v,
                                                                                match_vector=manager_vector)

assessor_data['job1PositionMatchChief'] = assessor_data['job1Position'].apply(match_job_position,
                                                                              w2v=w2v,
                                                                              match_vector=chief_vector)

assessor_data['job1PositionMatchSales'] = assessor_data['job1Position'].apply(match_job_position,
                                                                              w2v=w2v,
                                                                              match_vector=sales_vector)

In [18]:
assessor_data = assessor_data.drop('job1Position', axis=1)

In [19]:
X = assessor_data.drop(['profession', 'payment', 'asessor'], axis=1)
y = assessor_data['payment']

X = X.fillna(-1)

In [20]:
cv = KFold(4, True, 42)

In [21]:
#модель с отбрасыванием признака 'profession', целевая переменная payment

rfr = RandomForestRegressor(n_estimators=100, n_jobs=7)

scores = cross_val_score(rfr, X, y, cv=cv, scoring='neg_mean_absolute_error')

print(scores.mean(), scores.std())

-13922.2826022 131.90205079


In [13]:
assessor_data['profession'] = assessor_data['profession'].str.lower()
assessor_data['profession'] = assessor_data['profession'].str.replace('продаже', 'продажам')
assessor_data['profession'] = assessor_data['profession'].str.replace('менеджера', 'менеджер')
assessor_data['profession'] = assessor_data['profession'].str.replace(' / ', ',')

is_equipment = assessor_data['profession'].str.find('оборудован') >= 0
is_furniture = assessor_data['profession'].str.find('мебел') >= 0
is_internet = assessor_data['profession'].str.find('интернет') >= 0
is_services = assessor_data['profession'].str.find('услуг') >= 0
is_client = assessor_data['profession'].str.find('клиент') >= 0

assessor_data.loc[is_equipment, 'profession'] = 'менеджер по продажам оборудования'
assessor_data.loc[is_furniture, 'profession'] = 'менеджер по продажам мебели'
assessor_data.loc[is_internet, 'profession'] = 'менеджер по продажам интернет-магазина'
assessor_data.loc[is_services, 'profession'] = 'менеджер по продажам услуг'
assessor_data.loc[is_client, 'profession'] = 'менеджер по продажам и работе с клиентами'

splitted_professions = assessor_data['profession'].str.split(',')
assessor_data['profession'] = splitted_professions.apply(lambda x: x[0].strip())

mapped_professions = assessor_data['profession'].map({    
    'менеджер по продажам новостроек': 'менеджер по продажам недвижимости',
    'менеджер по продажам загородной недвижимости': 'менеджер по продажам недвижимости',
    'менеджер по продажам первичной недвижимости': 'менеджер по продажам недвижимости',
    'менеджер по продажам строящейся недвижимости': 'менеджер по продажам недвижимости',
    'менеджер по продажам недвижимости с обучением': 'менеджер по продажам недвижимости',
    'менеджер по продажам и аренде недвижимости': 'менеджер по продажам недвижимости',
    'менеджер по продажам зарубежной недвижимости': 'менеджер по продажам недвижимости',
    
    'менеджер по продажам запчастей': 'менеджер по продажам автозапчастей',
    'менеджер по продажам запасных частей': 'менеджер по продажам автозапчастей',
    
    'менеджер по продажам автомобилей с пробегом': 'менеджер по продажам автомобилей',
    'менеджер по продажам автомобилей (официальный дилер)': 'менеджер по продажам автомобилей',
    'менеджер по продажам новых автомобилей': 'менеджер по продажам автомобилей',
    
    'менеджер по продажам в отдел кредитования': 'менеджер по продажам банковских продуктов и услуг',
    'менеджер по продажам в банк': 'менеджер по продажам банковских продуктов и услуг',
    'менеджер по продажам банковских услуг': 'менеджер по продажам банковских продуктов и услуг',
    'менеджер по продажам банковских продуктов': 'менеджер по продажам банковских продуктов и услуг',
    
    'менеджер по продажам керамической плитки': 'менеджер по продажам строительных материалов',
    'менеджер по продажам напольных покрытий': 'менеджер по продажам строительных материалов',
    'менеджер по продажам стройматериалов': 'менеджер по продажам строительных материалов',
    'менеджер по продажам кровельных материалов': 'менеджер по продажам строительных материалов',
    'менеджер по продажам бетона': 'менеджер по продажам строительных материалов',
})

assessor_data.loc[mapped_professions.notnull(), 'profession'] = mapped_professions[mapped_professions.notnull()]

profession_counts = assessor_data['profession'].value_counts()
good_professions = profession_counts.index[profession_counts > 50]
is_good_profession = assessor_data['profession'].isin(good_professions)
assessor_data = assessor_data[is_good_profession]

assessor_data['profession_counts'] = count_ecoder(assessor_data['profession'])

In [22]:
enc = OneHotEncoder()
assessor_data['profession_fact'], _ = assessor_data['profession'].factorize()
profession_ohe = enc.fit_transform(assessor_data['profession_fact'].values.reshape(-1, 1))
profession_ohe = profession_ohe.toarray()
profession_ohe = pd.DataFrame(profession_ohe, index = assessor_data.index)

profession_names = []

for i in range(profession_ohe.shape[1]):
    profession_names.append(assessor_data.loc[(profession_ohe[i] == 1), 'profession'].iloc[0])
    
profession_ohe.rename(columns={0:'prof_0', 1:'prof_1', 2:'prof_2', 3:'prof_3', 4:'prof_4', 5:'prof_5', 6:'prof_6', 7:'prof_7', 8:'prof_8', 9:'prof_9'}, inplace=True)
assessor_data = pd.concat([assessor_data, profession_ohe], axis=1)
assessor_data = assessor_data.drop(['profession', 'profession_fact'], axis=1)

In [24]:
profession_names

['менеджер по продажам строительных материалов',
 'менеджер по продажам',
 'менеджер по продажам услуг',
 'менеджер по продажам и работе с клиентами',
 'менеджер по продажам автозапчастей',
 'менеджер по продажам оборудования',
 'менеджер по продажам интернет-магазина',
 'менеджер по продажам автомобилей',
 'менеджер по продажам недвижимости',
 'менеджер по продажам мебели']

In [24]:
assessor_data.to_csv('../data/processed/processed_train.csv')

In [207]:
def mae(est, X, y):
    y_pred = est.predict(X)
    err = np.abs(y - y_pred) / y
    
    return err.mean()

In [208]:
def mae_throw_worst(est, X, y):
    y_pred = est.predict(X)
    err = np.sort(np.abs(y - y_pred))
    
    return -err[:-int(err.shape[0]*0.1)].mean()

In [209]:
index = ['Candidate_DummyRegr', 'Candidate_RandForRegr', 'Candidate_LinRegr', 'Candidate_Lasso',
        'Assessor_DummyRegr', 'Assessor_RandForRegr', 'Assessor_LinRegr', 'Assessor_Lasso']
columns = ['MAE', 'MAE_throw_worst']
all_scores = pd.DataFrame(index=index, columns=columns)

# Разные модели для кандидатской целевой переменной

In [None]:
cv = KFold(4, True, 42)

In [None]:
X = assessor_data.drop(['payment', 'asessor'], axis=1)
y = assessor_data['payment']

X = X.fillna(-1)

In [214]:
#модель с учетом признака 'profession', целевая переменная payment

dre = DummyRegressor()

scores = cross_val_score(dre, X.values, y.values, cv=cv, scoring='neg_mean_absolute_error')

all_scores.loc['Candidate_DummyRegr', 'MAE'] = scores.mean()

print(scores.mean(), scores.std())

-15010.5228948 139.414619445


In [215]:
scores = cross_val_score(dre, X.values, y.values, cv=cv, scoring=mae_throw_worst)

all_scores.loc['Candidate_DummyRegr', 'MAE_throw_worst'] = scores.mean()

print(scores.mean(), scores.std())

-11657.9283164 146.353660707


In [216]:
rfr = RandomForestRegressor(n_estimators=1000, n_jobs=7)

scores = cross_val_score(rfr, X.values, y.values, cv=cv, scoring='neg_mean_absolute_error', verbose=3)

all_scores.loc['Candidate_RandForRegr', 'MAE'] = scores.mean()

print(scores.mean(), scores.std())

[CV]  ................................................................
[CV] ............................ , score=-13467.195798, total=  34.9s
[CV]  ................................................................


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:   34.9s remaining:    0.0s


[CV] ............................ , score=-13534.959613, total=  35.0s
[CV]  ................................................................


[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:  1.2min remaining:    0.0s


[CV] ............................ , score=-13118.783796, total=  35.2s
[CV]  ................................................................
[CV] ............................ , score=-13382.292183, total=  37.1s
-13375.8078474 157.943385094


[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed:  2.4min finished


In [217]:
scores = cross_val_score(rfr, X.values, y.values, cv=cv, scoring=mae_throw_worst)

all_scores.loc['Candidate_RandForRegr', 'MAE_throw_worst'] = scores.mean()

print(scores.mean(), scores.std())

-10242.7083884 198.588004343


In [218]:
lrm = LinearRegression()

scores = cross_val_score(lrm, X, y, cv=cv, scoring='neg_mean_absolute_error', verbose=3)

all_scores.loc['Candidate_LinRegr', 'MAE'] = scores.mean()

print(scores.mean(), scores.std())

[CV]  ................................................................
[CV] ............................ , score=-13339.023120, total=   0.0s
[CV]  ................................................................
[CV] ............................ , score=-13466.741522, total=   0.0s
[CV]  ................................................................
[CV] ............................ , score=-12744.348952, total=   0.0s
[CV]  ................................................................
[CV] ............................ , score=-13258.691950, total=   0.0s
-13202.2013858 274.554960407


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed:    0.1s finished


In [219]:
scores = cross_val_score(lrm, X, y, cv=cv, scoring=mae_throw_worst)

all_scores.loc['Candidate_LinRegr', 'MAE_throw_worst'] = scores.mean()

print(scores.mean(), scores.std())

-10050.1119336 291.526570803


In [220]:
lasso = Lasso()

scores = cross_val_score(lasso, X, y, cv=cv, scoring='neg_mean_absolute_error', verbose=3)

all_scores.loc['Candidate_Lasso', 'MAE'] = scores.mean()

print(scores.mean(), scores.std())

[CV]  ................................................................
[CV] ............................ , score=-13337.802590, total=   0.1s
[CV]  ................................................................


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s remaining:    0.0s


[CV] ............................ , score=-13465.610331, total=   0.1s
[CV]  ................................................................


[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.3s remaining:    0.0s


[CV] ............................ , score=-12743.948619, total=   0.1s
[CV]  ................................................................
[CV] ............................ , score=-13257.417872, total=   0.2s
-13201.1948526 274.23182034


[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed:    0.8s finished


In [221]:
scores = cross_val_score(lasso, X, y, cv=cv, scoring=mae_throw_worst)

all_scores.loc['Candidate_Lasso', 'MAE_throw_worst'] = scores.mean()

print(scores.mean(), scores.std())

-10049.089017 291.388479227


# Разные модели для ассессорской целевой переменной

In [222]:
X_without_assesor = X[assessor_data['asessor'].isnull()]
y_without_assesor = y[assessor_data['asessor'].isnull()] #целевая переменная payment

X_with_assesor = X[assessor_data['asessor'].notnull()]
y_with_assesor = y[assessor_data['asessor'].notnull()] #целевая переменная payment

y_test = assessor_data.loc[assessor_data['asessor'].notnull(), 'asessor'] #целевая переменная asessor

In [223]:
#модель обучается на 100 объектах, целевая переменная asessor

rfr = RandomForestRegressor(n_estimators=1000, n_jobs=7)

scores = cross_val_score(rfr, X_with_assesor, y_test, cv=cv, scoring='neg_mean_absolute_error', verbose=3)

all_scores.loc['Assessor_RandForRegr', 'MAE'] = scores.mean()

print(scores.mean(), scores.std())

[CV]  ................................................................
[CV] ............................. , score=-6818.500000, total=   1.3s
[CV]  ................................................................


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    1.3s remaining:    0.0s


[CV] ............................ , score=-12034.750000, total=   1.1s
[CV]  ................................................................


[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    2.5s remaining:    0.0s


[CV] ............................. , score=-8603.500000, total=   1.2s
[CV]  ................................................................
[CV] ............................ , score=-10635.000000, total=   1.1s
-9522.9375 1981.49139204


[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed:    5.0s finished


In [224]:
scores = cross_val_score(rfr, X_with_assesor, y_test, cv=cv, scoring=mae_throw_worst)

all_scores.loc['Assessor_RandForRegr', 'MAE_throw_worst'] = scores.mean()

print(scores.mean(), scores.std())

In [225]:
dre = DummyRegressor()

scores = cross_val_score(dre, X_with_assesor, y_test, cv=cv, scoring='neg_mean_absolute_error')

all_scores.loc['Assessor_DummyRegr', 'MAE'] = scores.mean()

print(scores.mean(), scores.std())

-11564.5833333 3208.11890517


In [226]:
scores = cross_val_score(dre, X_with_assesor, y_test, cv=cv, scoring=mae_throw_worst)

all_scores.loc['Assessor_DummyRegr', 'MAE_throw_worst'] = scores.mean()

print(scores.mean(), scores.std())

-9136.57407407 2071.5859642


In [227]:
scores = cross_val_score(lrm, X_with_assesor, y_test, cv=cv, scoring='neg_mean_absolute_error')

all_scores.loc['Assessor_LinRegr', 'MAE'] = scores.mean()

print(scores.mean(), scores.std())

-22444.8176661 2414.53048838


In [228]:
scores = cross_val_score(lrm, X_with_assesor, y_test, cv=cv, scoring=mae_throw_worst)

all_scores.loc['Assessor_LinRegr', 'MAE_throw_worst'] = scores.mean()

print(scores.mean(), scores.std())

-17659.9086476 2423.42000358


In [229]:
lasso = Lasso(tol=1)

scores = cross_val_score(lasso, X_with_assesor, y_test, cv=cv, scoring='neg_mean_absolute_error')

all_scores.loc['Assessor_Lasso', 'MAE'] = scores.mean()

print(scores.mean(), scores.std())

-14388.6143567 3508.71474375


In [230]:
scores = cross_val_score(lasso, X_with_assesor, y_test, cv=cv, scoring=mae_throw_worst)

all_scores.loc['Assessor_Lasso', 'MAE_throw_worst'] = scores.mean()

print(scores.mean(), scores.std())

-11938.2811568 3574.14380093


In [231]:
all_scores

Unnamed: 0,MAE,MAE_throw_worst
Candidate_DummyRegr,-15010.5,-11657.9
Candidate_RandForRegr,-13375.8,-10242.7
Candidate_LinRegr,-13202.2,-10050.1
Candidate_Lasso,-13201.2,-10049.1
Assessor_DummyRegr,-11564.6,-9136.57
Assessor_RandForRegr,-9522.94,-7286.88
Assessor_LinRegr,-22444.8,-17659.9
Assessor_Lasso,-14388.6,-11938.3


In [122]:
#clf = RandomForestRegressor(n_estimators=1000, n_jobs=7)

In [232]:
rfr.fit(X, y)

RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=None,
           max_features='auto', max_leaf_nodes=None,
           min_impurity_split=1e-07, min_samples_leaf=1,
           min_samples_split=2, min_weight_fraction_leaf=0.0,
           n_estimators=1000, n_jobs=7, oob_score=False, random_state=None,
           verbose=0, warm_start=False)

In [233]:
feat_imp_pay = pd.DataFrame(rfr.feature_importances_, index=X.columns, columns=['Importance'])
feat_imp_pay.sort_values('Importance', inplace=True, ascending=False)
feat_imp_pay = feat_imp_pay[feat_imp_pay['Importance'] > 0.01]
feat_imp_pay

Unnamed: 0,Importance
job1PositionMatchChief,0.085196
experienceMonthTotal,0.079939
job1PositionMatchSales,0.075685
age,0.056806
jobDurationMedian,0.054247
job1DurationMonths,0.054142
job2DurationMonths,0.047068
unemployedNMonths,0.045288
town,0.043539
rating,0.039704


In [234]:
rfr.fit(X_with_assesor, y_test)

RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=None,
           max_features='auto', max_leaf_nodes=None,
           min_impurity_split=1e-07, min_samples_leaf=1,
           min_samples_split=2, min_weight_fraction_leaf=0.0,
           n_estimators=1000, n_jobs=7, oob_score=False, random_state=None,
           verbose=0, warm_start=False)

In [235]:
feat_imp_assessor = pd.DataFrame(rfr.feature_importances_, index=X_with_assesor.columns, columns=['Importance'])
feat_imp_assessor.sort_values('Importance', inplace=True, ascending=False)
feat_imp_assessor = feat_imp_assessor[feat_imp_assessor['Importance'] > 0.01]
feat_imp_assessor

Unnamed: 0,Importance
job1PositionMatchChief,0.167733
rating,0.125865
job1PositionMatchSales,0.080539
age_count,0.068185
eduHigher1YearsSinceFinished,0.067909
computerSkillsLevel_count,0.056291
computerSkillsLevel,0.053809
experienceMonthTotal,0.046858
job1DurationMonths,0.045778
job2DurationMonths,0.035165
