Merge pull request #242 from dssg/ranking

Ranking Thanks @k1aus !
dssg · Mar 23, 2018 · 8360180 · 8360180
2 parents bb6beb7 + fbb5db8
commit 8360180
Show file tree

Hide file tree

Showing 2 changed files with 48 additions and 3 deletions.
diff --git a/eis/run_models.py b/eis/run_models.py
@@ -319,7 +319,7 @@ def train_test_models(self, train_matrix_uuid, model_ids_generator, model_storag
                         test_matrix=test_df.iloc[:, :-1],
                         model_id=trained_model_id,
                         test_date=test_date,
-                        n_ranks=30)
+                        n_ranks=200)
                 else:
                     log.info('Generate Evaluations for model_id: {}'.format(trained_model_id))
                     self.evaluations(predictions_proba, predictions_binary, test_df.iloc[:, -1], trained_model_id,
@@ -329,7 +329,7 @@ def train_test_models(self, train_matrix_uuid, model_ids_generator, model_storag
                         test_matrix=test_df.iloc[:, :-1],
                         model_id=trained_model_id,
                         test_date=test_date,
-                        n_ranks=30)
+                        n_ranks=200)
 
             # remove trained model from memory
             predictor.delete_model(trained_model_id)
@@ -381,7 +381,7 @@ def train_score_models(self, model_ids_generator, model_storage):
                     test_matrix=test_df.iloc[:, :-1],
                     model_id=trained_model_id,
                     test_date=test_date,
-                    n_ranks=30)
+                    n_ranks=200)
 
             # remove trained model from memory
             predictor.delete_model(trained_model_id)

diff --git a/generate_feature_mapping.py b/generate_feature_mapping.py
@@ -0,0 +1,45 @@
+import json
+import yaml
+import sys
+
+import pandas as pd
+
+from eis import setup_environment
+
+"""
+Example: To run with model_id 19932 
+python generate_feature_mapping.py 19932
+Output: 19932_feature_mapping.csv
+"""
+
+## Setup connection
+engine = setup_environment.get_connection_from_profile(config_file_name="default_profile.yaml")
+
+model_id = sys.argv[1]
+
+## Load the features_config
+with open('eis/features/features_descriptions.yaml') as f:
+	features_config = yaml.load(f)
+
+## Get the feature names you are trying to map
+feature_query = "SELECT feature FROM production.feature_importances WHERE model_id = {}".format(model_id)
+feature_names = pd.read_sql(feature_query, engine)
+
+## Build queries 
+def get_query(feature_name):
+	query = """SELECT * FROM public.get_feature_complete_description('{feature}',
+			'{feature_names}'::JSON, '{time_aggregations}'::JSON, '{metrics}'::JSON)""".format(feature=feature_name,feature_names=json.dumps(features_config['feature_names']), time_aggregations = json.dumps(features_config['time_aggregations']), metrics = json.dumps(features_config['metrics_name']))
+	return query
+
+list_of_dfs = []
+for i in range(len(feature_names)):
+	list_of_dfs.append(pd.read_sql(get_query(feature_names.feature[i]), engine))
+
+## Concat the dfs into one df
+feature_mapping = pd.concat(list_of_dfs, axis=0, ignore_index=True)
+
+## Write to csv
+feature_mapping.to_csv(str(model_id)+'_feature_mapping.csv', index=False, quotechar='|')
+
+# write to database
+feature_mapping.to_sql("feature_mapping", engine, if_exists="replace", schema="production", index=False)