Skip to content

Commit

Permalink
Merge pull request #242 from dssg/ranking
Browse files Browse the repository at this point in the history
Ranking
Thanks @k1aus !
  • Loading branch information
jtwalsh0 committed Mar 23, 2018
2 parents bb6beb7 + fbb5db8 commit 8360180
Show file tree
Hide file tree
Showing 2 changed files with 48 additions and 3 deletions.
6 changes: 3 additions & 3 deletions eis/run_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -319,7 +319,7 @@ def train_test_models(self, train_matrix_uuid, model_ids_generator, model_storag
test_matrix=test_df.iloc[:, :-1],
model_id=trained_model_id,
test_date=test_date,
n_ranks=30)
n_ranks=200)
else:
log.info('Generate Evaluations for model_id: {}'.format(trained_model_id))
self.evaluations(predictions_proba, predictions_binary, test_df.iloc[:, -1], trained_model_id,
Expand All @@ -329,7 +329,7 @@ def train_test_models(self, train_matrix_uuid, model_ids_generator, model_storag
test_matrix=test_df.iloc[:, :-1],
model_id=trained_model_id,
test_date=test_date,
n_ranks=30)
n_ranks=200)

# remove trained model from memory
predictor.delete_model(trained_model_id)
Expand Down Expand Up @@ -381,7 +381,7 @@ def train_score_models(self, model_ids_generator, model_storage):
test_matrix=test_df.iloc[:, :-1],
model_id=trained_model_id,
test_date=test_date,
n_ranks=30)
n_ranks=200)

# remove trained model from memory
predictor.delete_model(trained_model_id)
Expand Down
45 changes: 45 additions & 0 deletions generate_feature_mapping.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
import json
import yaml
import sys

import pandas as pd

from eis import setup_environment

"""
Example: To run with model_id 19932
python generate_feature_mapping.py 19932
Output: 19932_feature_mapping.csv
"""

## Setup connection
engine = setup_environment.get_connection_from_profile(config_file_name="default_profile.yaml")

model_id = sys.argv[1]

## Load the features_config
with open('eis/features/features_descriptions.yaml') as f:
features_config = yaml.load(f)

## Get the feature names you are trying to map
feature_query = "SELECT feature FROM production.feature_importances WHERE model_id = {}".format(model_id)
feature_names = pd.read_sql(feature_query, engine)

## Build queries
def get_query(feature_name):
query = """SELECT * FROM public.get_feature_complete_description('{feature}',
'{feature_names}'::JSON, '{time_aggregations}'::JSON, '{metrics}'::JSON)""".format(feature=feature_name,feature_names=json.dumps(features_config['feature_names']), time_aggregations = json.dumps(features_config['time_aggregations']), metrics = json.dumps(features_config['metrics_name']))
return query

list_of_dfs = []
for i in range(len(feature_names)):
list_of_dfs.append(pd.read_sql(get_query(feature_names.feature[i]), engine))

## Concat the dfs into one df
feature_mapping = pd.concat(list_of_dfs, axis=0, ignore_index=True)

## Write to csv
feature_mapping.to_csv(str(model_id)+'_feature_mapping.csv', index=False, quotechar='|')

# write to database
feature_mapping.to_sql("feature_mapping", engine, if_exists="replace", schema="production", index=False)

0 comments on commit 8360180

Please sign in to comment.