In [2]:

import sqlalchemy as db
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
from pytz import timezone, utc
from new_api import (add_entity, create_new_entity_set, add_aggregation_features, get_training_df, get_prediction_df, 
add_entity_df, list_features)


In [3]:
agent_entity = {"name": "agent", "table": "agent", "type": "primary", "index": "agent_id", "time": {"field": "effective_date", "type": "effective_date"}}
add_entity(agent_entity)


acxiom_entity = {"name": "acxiom", "table": "agent_acxiom", "type": "primary", "index": "agent_id", "time": {"field": "date", "type": "effective_date"}}
add_entity(acxiom_entity)


comission_events = {"name": "agent_commission", "table": "agent_sales", "type": "event", 'index': "id", "time": {"field": "date", "type": "event"}}
add_entity(comission_events)



In [4]:
relationships =  [
            ("one_to_one", {"name": "agent", "index": "agent_id"}, {"name": "acxiom", "index": "agent_id"}),
            ("one_to_many", {"name": "agent", "index": "agent_id"}, {"name": "agent_commission",  "index": "agent_id"})
                ]


create_new_entity_set(name="nyl_agents", entities=["agent", "acxiom", "agent_commission"], relationships=relationships)


In [5]:
agent_commission_agg_features = {"total_sales": {"feature": "amount","function":"sum", "name": "total_sales", "time_window": "full_history"},
                             "max_sales":  {"feature": "amount", "function":"max", "name": "max_sales", "time_window": "full_history"},
                             "total_num_sales":   {"feature": "id", "function": "count", "name": "total_num_sales", "time_window": "full_history"}
                            }
add_aggregation_features("agent_commission", agent_commission_agg_features)


In [8]:
available_features = list_features("nyl_agents")
available_features

{'agent': {'raw_features': ['index',
   'effective_date',
   'agent_id',
   'feature_1',
   'feature_2',
   'feature_3',
   'feature_4',
   'feature_5',
   'feature_6',
   'feature_7',
   'feature_8']},
 'acxiom': {'raw_features': ['index',
   'date',
   'agent_id',
   'zipcode',
   'num_household']},
 'agent_commission': {'raw_features': ['index',
   'id',
   'date',
   'agent_id',
   'amount',
   'feature_2',
   'feature_3'],
  'calulated_features': ['total_sales', 'max_sales', 'total_num_sales']}}

In [9]:
days = [datetime.utcnow().replace(hour=0, minute=0, second=0, microsecond=0).replace(tzinfo=utc) \
        - timedelta(day * 365) for day in range(2)][::-1]

agents = [1001, 1002, 1003, 1004, 1005]


df = pd.DataFrame(
    {
        "observation_time": [day for day in days for customer in agents],
        "agent_id": [customer for day in days for customer in agents],
        "prediction": [np.random.rand()  for _ in range(len(days) * len(agents))],

    }
)


eol = {"pk": "agent_id", "observation_date": "observation_time", "label": "prediction"}

In [10]:
features = {"entity_set": "nyl_agents",
            "target_entity": "agent",
            "features": {
                         "agent": ["feature_1", "feature_2", "feature_6"],
                         "acxiom": ["zipcode", "num_household"],
                         "agent_commission": ["total_sales", "max_sales"]

                    },
            "observations": {"type": "eol", "eol": eol, "data": df}
           
            }
training_df = get_training_df(features)
training_df

Unnamed: 0,agent_id,observation_time,prediction,feature_1,feature_2,feature_6,zipcode,num_household,total_sales,max_sales
0,1001,2019-04-21,0.721943,0.480067,7.723815,8.379558,4.255309,7.496026,5342.100585,98.794306
1,1002,2019-04-21,0.106506,9.013384,9.068622,1.776475,7.075826,2.386496,5231.710173,98.79592
2,1003,2019-04-21,0.112372,7.212778,4.733295,1.301572,4.490116,2.655191,5020.385718,99.179627
3,1004,2019-04-21,0.155888,9.021748,4.541025,6.934113,2.61947,5.988528,4691.319855,98.665236
4,1005,2019-04-21,0.413855,7.832689,7.340339,3.002503,6.303029,6.421379,5127.804913,98.712913
5,1001,2020-04-20,0.314868,8.545554,2.633264,9.333365,8.531021,4.476037,8140.666356,99.128721
6,1002,2020-04-20,0.187019,2.701212,6.357321,8.831064,4.189019,5.275873,8420.646085,98.79592
7,1003,2020-04-20,0.976671,7.073583,7.422053,4.454242,6.963907,1.391084,7420.977796,99.517803
8,1004,2020-04-20,0.289052,5.611286,1.986844,6.344534,5.525038,5.737543,7368.291179,98.665236
9,1005,2020-04-20,0.715793,4.003701,9.816454,2.429394,6.021134,0.470115,7970.363264,98.712913


In [11]:
prediction_df = get_prediction_df(features)
prediction_df


Unnamed: 0,agent_id,feature_1,feature_2,feature_6,zipcode,num_household,total_sales,max_sales
0,1001,8.545554,2.633264,9.333365,8.531021,4.476037,8140.666356,99.128721
1,1002,2.701212,6.357321,8.831064,4.189019,5.275873,8420.646085,98.79592
2,1003,7.073583,7.422053,4.454242,6.963907,1.391084,7420.977796,99.517803
3,1004,5.611286,1.986844,6.344534,5.525038,5.737543,7368.291179,98.665236
4,1005,4.003701,9.816454,2.429394,6.021134,0.470115,7970.363264,98.712913


In [None]:
features = {"entity_set": "nyl_agents",
            "target_entity": "agent_commision",
            "features": {
                         "agent": ["feature_1", "feature_2", "feature_6"],
                         "acxiom": ["zipcode", "num_household"],
                         "agent_commission": ["amount", "date", "feature_2", "feature_3", "total_sales", "max_sales"]

                    },
            "observations": {"type": "event"}
            }
training_df = get_training_df(features
training_df