**Python Version Requirement:** Python 3.6

In [2]:
import datamart
import datamart_nyu
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.impute import SimpleImputer
from sklearn.metrics import mean_absolute_error, mean_squared_error, \
    mean_squared_log_error, median_absolute_error, r2_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from pathlib import Path
import subprocess
import os
import shutil
import json
import time

ModuleNotFoundError: No module named 'datamart'

In [2]:
DATAMART_PATH = '/Users/fchirigati/projects/d3m/datamart'

In [3]:
def train_and_test_model(data, target_variable_name):
    """Builds a model using data to predict the target variable.
    """

    X_train, X_test, y_train, y_test = train_test_split(
        data.drop(target_variable_name, axis=1),
        data[target_variable_name],
        test_size=0.33,
        random_state=42
    )

    # normalizing data first
    scaler_X = StandardScaler().fit(X_train)
    scaler_y = StandardScaler().fit(y_train.values.reshape(-1, 1))
    X_train = scaler_X.transform(X_train)
    y_train = scaler_y.transform(y_train.values.reshape(-1, 1))
    X_test = scaler_X.transform(X_test)
    y_test = scaler_y.transform(y_test.values.reshape(-1, 1))

    forest = RandomForestRegressor(
        n_estimators=100,
        random_state=42,
        n_jobs=-1,
        max_depth=len(data.columns)-1
    )
    forest.fit(X_train, y_train.ravel())
    yfit = forest.predict(X_test)

    return dict(
        mean_absolute_error=mean_absolute_error(y_test, yfit),
        mean_squared_error=mean_squared_error(y_test, yfit),
        median_absolute_error=median_absolute_error(y_test, yfit),
        r2_score=r2_score(y_test, yfit)
    )

In [4]:
def get_performance_scores(data, target_variable_name, missing_value_imputation):
    """Builds a model using data to predict the target variable,
    returning different performance metrics.
    """

    if missing_value_imputation:
        
        # imputation on data
        fill_NaN = SimpleImputer(missing_values=np.nan, strategy='mean')
        new_data = pd.DataFrame(fill_NaN.fit_transform(data))
        new_data.columns = data.columns
        new_data.index = data.index

        # training and testing model
        return train_and_test_model(new_data, target_variable_name)

    else:
        return train_and_test_model(data, target_variable_name)

In [5]:
def print_results(results):
    if not results:
        return
    for result in results:
        print(result.score())
        print(result.get_json_metadata()['metadata']['name'])
        if (result.get_augment_hint()):
            left_columns = []
            for column_ in result.get_augment_hint().left_columns:
                left_columns.append([])
                for column in column_:
                    left_columns[-1].append((column.resource_id, column.column_index))
            print("Left Columns: %s" % str(left_columns))
            right_columns = []
            for column_ in result.get_augment_hint().right_columns:
                right_columns.append([])
                for column in column_:
                    right_columns[-1].append((column.resource_id, column.column_index))
            print("Right Columns: %s" % str(right_columns))
        else:
            print(result.id())
        print("-------------------")

In [6]:
def get_materialize_info(results):
    if not results:
        return
    id_to_materialize = dict()
    for result in results:
        id_ = result.get_json_metadata()['id']
        if id_ in id_to_materialize:
            continue
        id_to_materialize[id_] = dict(
            has_info=False,
            url=None,
            path=None
        )
        if 'direct_url' in result.get_json_metadata()['metadata']['materialize']:
            id_to_materialize[id_]['url'] = result.get_json_metadata()['metadata']['materialize']['direct_url']
            id_to_materialize[id_]['has_info'] = True
        else:
            # try to find them on volumes
            datamart_file_path = os.path.join(DATAMART_PATH, 'volumes/datasets', id_, 'main.csv')
            if os.path.exists(datamart_file_path):
                id_to_materialize[id_]['path'] = datamart_file_path
                id_to_materialize[id_]['has_info'] = True
    return id_to_materialize

In [43]:
def download_datasets_and_generate_training_records(results, supplied_data, supplied_data_path, target,
                                                    id_to_materialize, dir_):
    if not results:
        return
    
    training_records = list()
    companion_datasets_dir = os.path.dirname(dir_)
    
    # downloading candidate datasets
    for id_ in id_to_materialize:
        companion_dataset_path = os.path.join(companion_datasets_dir, id_)
        if os.path.exists(companion_dataset_path):
            # dataset has been downloaded before
            continue
        if id_to_materialize[id_]['has_info']:
            if id_to_materialize[id_]['url']:
                subprocess.call('wget -O %s %s'%(companion_dataset_path,
                                                 id_to_materialize[id_]['url']),
                                shell=True)
            else:
                shutil.copyfile(id_to_materialize[id_]['path'], companion_dataset_path)

            if not os.path.exists(companion_dataset_path):
                print('%s has no valid materialization information for download.' % id_)
                id_to_materialize[id_]['has_info'] = False
                continue
            if os.stat(companion_dataset_path).st_size <= 0:
                print('%s has no valid materialization information for download.' % id_)
                os.remove(companion_dataset_path)
                id_to_materialize[id_]['has_info'] = False
                continue

        else:
            print('%s has no materialization information for download.' % id_)
    
    for i in range(len(results)):
        time.sleep(2)
        metadata = results[i].get_json_metadata()
        id_ = metadata['id']
        if not id_to_materialize[id_]['has_info']:
            continue
            
        # query and candidate keys
        left_column_index = results[i].get_augment_hint().left_columns[0][0].column_index
        right_column_index = results[i].get_augment_hint().right_columns[0][0].column_index
        query_key = list(supplied_data['learningData'].columns)[left_column_index]
        candidate_key = metadata['metadata']['columns'][right_column_index]['name']

        # paths
        join_path = os.path.join(dir_, '%d.csv'%i)
        original_candidate_path = os.path.join(companion_datasets_dir, id_)
        candidate_path = os.path.join(
            companion_datasets_dir,
            '%s_%s'%(id_, candidate_key.replace('%s'%os.path.sep, '_').strip())
        )    
        
        try:
            if not os.path.exists(join_path):
                
                join_ = results[i].augment(
                    supplied_data=supplied_data,
                    connection_url='http://localhost:8002/'
                )
                
                # excluding d3mIndex
                join_['learningData'].drop(['d3mIndex'], axis=1, inplace=True)
        except Exception as e:
            continue

        try:
            if not os.path.exists(candidate_path):
                companion_data = pd.read_csv(original_candidate_path)
                # collecting candidate key column
                candidate_key_column = companion_data[candidate_key]
                # excluding categorical / textual attributes
                companion_data = companion_data.select_dtypes(exclude=['object', 'bool'])
                if candidate_key not in companion_data.columns:
                    companion_data[candidate_key] = candidate_key_column
                # excluding columns with all NaN values
                companion_data.dropna(axis=1, how='all', inplace=True)
                # if the final dataset has only the key, ignore
                if len(companion_data.columns) < 2:
                    continue
                # saving candidate dataset
                companion_data.to_csv(candidate_path, index=False)
                
            if not os.path.exists(join_path):
                # need to load and save again to exclude categorical / textual attributes
                join_['learningData'].to_csv(join_path, index=False)
                joined_data = pd.read_csv(join_path)
                # collecting key column
                key_column = joined_data[query_key]
                # if key column is not unique, this means that aggregation is necessary
                #   so we ignore
                if len(set(key_column.tolist())) != len(key_column.tolist()):
                    os.remove(join_path)
                    continue
                # excluding categorical / textual attributes
                joined_data = joined_data.select_dtypes(exclude=['object', 'bool'])
                if query_key not in joined_data.columns:
                    joined_data[query_key] = key_column
                # excluding columns with all NaN values
                joined_data.dropna(axis=1, how='all', inplace=True)
                # if number of columns in joined dataset is the same as in query data,
                #   it means that there was no join (no intersection), and we ignore
                if len(supplied_data['learningData'].columns) -1 == len(joined_data.columns):
                    os.remove(join_path)
                    continue
                joined_data.to_csv(join_path, index=False)
            else:
                joined_data = pd.read_csv(join_path)

            # scores before augmentation
            scores_query = get_performance_scores(
                pd.read_csv(supplied_data_path).drop([query_key], axis=1),
                target,
                True
            )

            # scores after augmentation
            scores_query_candidate = get_performance_scores(
                joined_data.drop([query_key], axis=1),
                target,
                True
            )

            training_records.append(dict(
                query_dataset=supplied_data_path,
                query_key=query_key,
                target=target,
                candidate_dataset=os.path.abspath(candidate_path),
                candidate_key=candidate_key,
                joined_dataset=os.path.abspath(join_path),
                imputation_strategy='mean',
                mean_absolute_error=[scores_query['mean_absolute_error'],
                                     scores_query_candidate['mean_absolute_error']],
                mean_squared_error=[scores_query['mean_squared_error'],
                                    scores_query_candidate['mean_squared_error']],
                median_absolute_error=[scores_query['median_absolute_error'],
                                       scores_query_candidate['median_absolute_error']],
                r2_score=[scores_query['r2_score'],
                          scores_query_candidate['r2_score']]
            ))
            
        except Exception as e:
            print('ID: %d'%i)
            print('Join Path: %s'%join_path)
            print('Original Candidate Path: %s'%original_candidate_path)
            print('Candidate Path: %s'%candidate_path)
            print('Query Key: %s'%query_key)
            print('Candidate Key: %s'%candidate_key)
            raise e
        
    return training_records

In [8]:
if not os.path.exists('companion-datasets'):
    os.mkdir('companion-datasets')
for p in ['taxi-vehicle-collision', 'ny-taxi-demand', 'college-debt', 'poverty-estimation']:
    if not os.path.exists('companion-datasets/%s'%p):
        os.mkdir('companion-datasets/%s'%p)

In [9]:
client = datamart_nyu.NYUDatamart('http://localhost:8002/')

## NY Taxi and Vehicle Collision Problem

In [10]:
taxi_vehicle_collision_path = str(Path.home()) + '/projects/dataset-ranking/use-cases/data/taxi-vehicle-collision/' +\
       'taxi-vehicle-collision-v2.csv'
taxi_vehicle_collision = container.Dataset.load('file://' + taxi_vehicle_collision_path)

In [11]:
taxi_vehicle_collision['learningData'].head()

Unnamed: 0,d3mIndex,datetime,n. trips,n. collisions
0,0,2014-01-01,420810,399
1,1,2014-01-02,359958,603
2,2,2014-01-03,275470,423
3,3,2014-01-04,417499,418
4,4,2014-01-05,388542,320


In [12]:
cursor = client.search_with_data(
    query=datamart.DatamartQuery(
        variables=[
            datamart.TabularVariable(
                columns=[datamart.DatasetColumn('', 1)],
                relationship=datamart.ColumnRelationship.CONTAINS
            )
        ]
    ),
    supplied_data=taxi_vehicle_collision
)

In [13]:
taxi_vehicle_collision_results = list()
results = cursor.get_next_page()
while results:
    taxi_vehicle_collision_results += results
    results = cursor.get_next_page()

In [14]:
len(taxi_vehicle_collision_results)

1064

In [15]:
# print_results(taxi_vehicle_collision_results)

In [16]:
taxi_vehicle_collision_info = get_materialize_info(taxi_vehicle_collision_results)

In [17]:
taxi_vehicle_collision_training_records = download_datasets_and_generate_training_records(
    taxi_vehicle_collision_results,
    taxi_vehicle_collision,
    taxi_vehicle_collision_path,
    'n. trips',
    taxi_vehicle_collision_info,
    'companion-datasets/taxi-vehicle-collision'
)

datamart.url.0a41288b3f9256e9906062a5fd75169a has no valid materialization information for download.
datamart.upload.a031bc4968cb4838967e4709e63a0ddc has no materialization information for download.
datamart.upload.83ee6db44a3f434aa0031dc4eb266094 has no materialization information for download.
datamart.upload.469f627ada7349f285ad22d3028bc38d has no materialization information for download.
datamart.upload.c90cd58ac0c54b169580b49b387cc59e has no materialization information for download.


  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
Error from DataMart: 500 Internal Server Error
  if (await self.run_code(code, result,  async_=asy)):
Error from DataMart: 500 Internal Server Error
Error from DataMart: 500 Internal Server Error
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
Error from DataMart: 500 Internal Server Error
Error from DataMart: 500 Internal Server Error
Error from DataMart: 500 Internal Server Error
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  i

  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
Error from DataMart: 500 Internal Server Error
Error from DataMart: 500 Internal Server Error
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
Error from DataMart: 500 Internal Server Error
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
Error from DataMart: 500 Internal Server Error
  if (await self.run_code(code, result,  async_=asy)):
Error from DataMart: 500 Internal Server Error
  if (await self.run_code(code, result,  async_=as

### Generating file with training records

In [18]:
if os.path.exists('taxi-vehicle-collision-datamart-records/'):
    shutil.rmtree('taxi-vehicle-collision-datamart-records/')
os.mkdir('taxi-vehicle-collision-datamart-records/')

In [19]:
training_records = open('taxi-vehicle-collision-datamart-records/datamart-records', 'w')
for record in taxi_vehicle_collision_training_records:
    training_records.write(json.dumps(record) + "\n")
training_records.close()

## NY Taxi Demand Problem

In [20]:
ny_taxi_demand_path = str(Path.home()) + '/projects/dataset-ranking/use-cases/data/ny-taxi-demand/' +\
       'yellow-taxi-2017-v2.csv'
ny_taxi_demand = container.Dataset.load('file://' + ny_taxi_demand_path)

## College Debt Problem

In [21]:
college_debt_path = str(Path.home()) + '/projects/dataset-ranking/use-cases/data/college-debt/' +\
       'college-debt-v2.csv'
college_debt = container.Dataset.load('file://' + college_debt_path)

In [22]:
college_debt['learningData'].head()

Unnamed: 0,d3mIndex,UNITID,PCTFLOAN,PCIP16,PPTUG_EF,UGDS_WHITE,UGDS_BLACK,UGDS_HISP,UGDS_ASIAN,SATMTMID,SATVRMID,SATWRMID,UGDS,DEBT_EARNINGS_RATIO
0,0,12268508,,,,,,,,,,,,49
1,1,207564,0.475,0.0,0.2297,0.2953,0.0291,0.0647,0.0051,,,,2164.0,36
2,2,420024,0.8125,0.0,0.2315,0.2808,0.5665,0.0493,0.0,,,,203.0,127
3,3,164492,0.7465,0.0,0.2621,0.6518,0.1258,0.1022,0.0123,,,,1057.0,76
4,4,234085,0.4589,0.0321,0.0,0.7992,0.0607,0.0584,0.042,575.0,575.0,,1713.0,53


In [23]:
cursor = client.search_with_data(
    query=datamart.DatamartQuery(
        variables=[
            datamart.TabularVariable(
                columns=[datamart.DatasetColumn('', 1)],
                relationship=datamart.ColumnRelationship.CONTAINS
            )
        ]
    ),
    supplied_data=college_debt
)

In [24]:
college_debt_results = list()
results = cursor.get_next_page()
while results:
    college_debt_results += results
    results = cursor.get_next_page()

In [25]:
len(college_debt_results)

188

In [26]:
# print_results(college_debt_results)

In [27]:
college_debt_info = get_materialize_info(college_debt_results)

In [28]:
college_debt_training_records = download_datasets_and_generate_training_records(
    college_debt_results,
    college_debt,
    college_debt_path,
    'DEBT_EARNINGS_RATIO',
    college_debt_info,
    'companion-datasets/college-debt'
)

  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (awai

  if (await self.run_code(code, result,  async_=asy)):
Error from DataMart: 500 Internal Server Error
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.r

### Generating file with training records

In [29]:
if os.path.exists('college-debt-datamart-records/'):
    shutil.rmtree('college-debt-datamart-records/')
os.mkdir('college-debt-datamart-records/')

In [30]:
training_records = open('college-debt-datamart-records/datamart-records', 'w')
for record in college_debt_training_records:
    training_records.write(json.dumps(record) + "\n")
training_records.close()

## Poverty Estimation Problem

In [31]:
poverty_estimation_path = str(Path.home()) + '/projects/dataset-ranking/use-cases/data/poverty-estimation/' +\
       'poverty-estimation-v2.csv'
poverty_estimation = container.Dataset.load('file://' + poverty_estimation_path)

In [32]:
poverty_estimation['learningData'].head()

Unnamed: 0,d3mIndex,FIPS,POVALL_2016,Rural-urban_Continuum Code_2003,Rural-urban_Continuum Code_2013,Urban_Influence_Code_2003,Urban_Influence_Code_2013,Economic_typology_2015,RESIDUAL_2016,R_birth_2011,...,R_DOMESTIC_MIG_2013,R_DOMESTIC_MIG_2014,R_DOMESTIC_MIG_2015,R_DOMESTIC_MIG_2016,R_NET_MIG_2011,R_NET_MIG_2012,R_NET_MIG_2013,R_NET_MIG_2014,R_NET_MIG_2015,R_NET_MIG_2016
0,0,35005,13974,5.0,5.0,8.0,8.0,0.0,-1.0,14.5,...,-2.1,-7.6,-5.9,-7.6,-4.3,-3.0,-0.6,-7.2,-4.2,-6.8
1,1,13297,11385,1.0,1.0,1.0,1.0,0.0,1.0,12.9,...,8.2,12.7,6.6,13.5,4.0,-0.1,8.9,13.2,7.2,14.4
2,2,13137,6500,6.0,6.0,5.0,5.0,3.0,-2.0,12.0,...,-7.1,6.9,3.0,6.0,-3.1,5.7,-6.8,7.2,4.1,7.0
3,3,54017,1460,9.0,9.0,8.0,8.0,2.0,2.0,8.0,...,31.0,-7.8,29.0,-13.9,7.7,8.5,30.8,-8.0,28.9,-14.0
4,4,55055,7618,4.0,4.0,3.0,3.0,3.0,-2.0,11.2,...,-0.4,-6.4,-1.2,-2.4,-2.3,2.8,0.2,-5.8,-0.6,-1.8


In [33]:
cursor = client.search_with_data(
    query=datamart.DatamartQuery(
        variables=[
            datamart.TabularVariable(
                columns=[datamart.DatasetColumn('', 1)],
                relationship=datamart.ColumnRelationship.CONTAINS
            )
        ]
    ),
    supplied_data=poverty_estimation
)

In [34]:
poverty_estimation_results = list()
results = cursor.get_next_page()
while results:
    poverty_estimation_results += results
    results = cursor.get_next_page()

In [35]:
len(poverty_estimation_results)

5000

In [36]:
# print_results(poverty_estimation_results)

In [37]:
poverty_estimation_info = get_materialize_info(poverty_estimation_results)

In [44]:
poverty_estimation_training_records = download_datasets_and_generate_training_records(
    poverty_estimation_results,
    poverty_estimation,
    poverty_estimation_path,
    'POVALL_2016',
    poverty_estimation_info,
    'companion-datasets/poverty-estimation'
)

  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
Error from DataMart: 500 Internal Server Error
Error from DataMart: 500 Internal Server Error
Error from DataMart: 500 Internal Server Error
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, re

  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (awai

  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
Error from DataMart: 500 Internal Server Error
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
Error from DataMart: 500 Internal Server Error
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
Error from DataMart: 500 I

  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (awai

  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
Error from DataMart: 500 Internal Server Error
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.r

Error from DataMart: 500 Internal Server Error
  if (await self.run_code(code, result,  async_=asy)):
Error from DataMart: 500 Internal Server Error
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(

  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
Error from DataMart: 500 Internal Server Error
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
Error from DataMart: 500 Internal Server Error
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(

  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
Error from DataMart: 500 Internal Server Error
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
Error from DataMart: 500 Internal Server Error
Error from DataMart: 500 Internal Server Error
Error from DataMart: 500 Internal Server Error
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  a

  if (await self.run_code(code, result,  async_=asy)):
Error from DataMart: 500 Internal Server Error
Error from DataMart: 500 Internal Server Error
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
Error from DataMart: 500 Internal Server Error
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, re

  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
Error from DataMart: 500 Internal Server Error
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
Error from DataMart: 500 Internal Server Error
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(

  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
Error from DataMart: 500 Internal Server Error
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
Error from DataMart: 500 Internal Server Error
  if (await self.run_code(code, result,  async_=asy)):
Error from DataMart: 500 Internal Server Error
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, re

  if (await self.run_code(code, result,  async_=asy)):
Error from DataMart: 500 Internal Server Error
  if (await self.run_code(code, result,  async_=asy)):
Error from DataMart: 500 Internal Server Error
  if (await self.run_code(code, result,  async_=asy)):
Error from DataMart: 500 Internal Server Error
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, re

### Generating file with training records

In [45]:
if os.path.exists('poverty-estimation-datamart-records/'):
    shutil.rmtree('poverty-estimation-datamart-records/')
os.mkdir('poverty-estimation-datamart-records/')

In [46]:
training_records = open('poverty-estimation-datamart-records/datamart-records', 'w')
for record in poverty_estimation_training_records:
    training_records.write(json.dumps(record) + "\n")
training_records.close()