# Imports

In [126]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from collections import Counter
events = pd.read_csv("6sense/events.csv")
calls = pd.read_csv("6sense/calls.csv")
companies1 = pd.read_csv("6sense/companies.csv")
people = pd.read_csv("6sense/people.csv")
opportunities = pd.read_csv("6sense/opportunities.csv")
events = events.rename(columns={"date": "event_date"})
calls = calls.rename(columns={"timestamp": "call_time"})
del calls['date']

# Data Preprocessing

In [147]:
companies_people = pd.merge(companies1, people, on='company_id')
companies_people_calls = pd.merge(companies_people, calls, on='contact_id')
companies_people_calls_opp = pd.merge(companies_people_calls, opportunities, on='company_id', how='outer')
opportunity_created = companies_people_calls_opp['created_date'].notnull()
opportunity_label = opportunity_created.astype(int)
merged = pd.merge(companies_people_calls_opp, events, on='contact_id')
del merged['company_id']
del merged['activity_name']
del merged['contact_id']
del merged['created_date']
merged['event_date'] =  pd.to_datetime(merged['event_date'])
merged['event_day'] = pd.Series([e.strftime('%a') for e in merged['event_date']]).astype('category').cat.codes
merged['event_month'] = merged['event_date'].dt.strftime('%b').astype('category').cat.codes
del merged['event_date']
merged['call_time'] =  pd.to_datetime(merged['call_time'])
merged['call_day'] = pd.Series([e.strftime('%a') for e in merged['call_time']]).astype('category').cat.codes
merged['call_month'] = merged['call_time'].dt.strftime('%b').astype('category').cat.codes
merged['call_hour'] = merged['call_time'].dt.hour
del merged['call_time']
merged['industry'] = merged['industry'].replace(np.nan, 'UNKNOWN', regex=True).astype('category').cat.codes
merged['employee_range'] = merged['employee_range'].replace(np.nan, '1,000 - 4,999', regex=True).astype('category').cat.codes
merged['job_level'] = merged['job_level'].astype('category').cat.codes
merged['job_function'] = merged['job_function'].astype('category').cat.codes
merged['activity_type'] = merged['activity_type'].astype('category').cat.codes
merged['activity_action'] = merged['activity_action'].astype('category').cat.codes
merged['call_disposition'] = merged['call_disposition'].astype('category').cat.codes

merged

Unnamed: 0,industry,employee_range,job_level,job_function,call_disposition,activity_action,activity_type,event_day,event_month,call_day,call_month,call_hour
0,14,1,1,9,3,4,3,6,10,4,8,17
1,14,1,1,9,3,5,5,1,10,4,8,17
2,14,1,1,9,3,4,3,4,11,4,8,17
3,14,1,1,9,3,4,3,1,10,4,8,17
4,14,1,1,9,3,4,3,1,10,4,8,17
...,...,...,...,...,...,...,...,...,...,...,...,...
4663,9,2,6,9,2,2,4,6,6,6,3,21
4664,9,2,6,9,2,4,3,1,0,6,3,21
4665,9,2,6,9,2,4,3,0,0,6,3,21
4666,9,2,6,9,2,5,5,6,6,6,3,21


# Training Model

## Predict Call Disposition

In [153]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
target=merged['call_disposition']
features=merged.drop(['call_disposition'],axis=1)
x_train,x_test,y_train,y_test=train_test_split(features,target,test_size=0.2,random_state=42)

model = RandomForestRegressor()
model.fit(x_train, y_train)
prediction = model.predict(x_test)
training_accuracy = model.score(x_train, y_train)
print ('Training Accuracy:',training_accuracy)
test_accuracy = model.score(x_test, y_test)
print ('Testing Accuracy:',test_accuracy)

Training Accuracy: 0.8582734233641356
Testing Accuracy: 0.6451455500832921


## Predict Best Day of Week to Call 

In [154]:
target=merged['call_day']
features=merged.drop(['call_day'],axis=1)
x_train,x_test,y_train,y_test=train_test_split(features,target,test_size=0.2,random_state=42)

model = RandomForestRegressor()
model.fit(x_train, y_train)
prediction = model.predict(x_test)
training_accuracy = model.score(x_train, y_train)
print ('Training Accuracy:',training_accuracy)
test_accuracy = model.score(x_test, y_test)
print ('Testing Accuracy:',test_accuracy)

Training Accuracy: 0.9689645188252691
Testing Accuracy: 0.84541092507259


## Predict  Best Hour to Call 

In [152]:
target=merged['call_hour']
features=merged.drop(['call_hour'],axis=1)
x_train,x_test,y_train,y_test=train_test_split(features,target,test_size=0.2,random_state=42)

model = RandomForestRegressor()
model.fit(x_train, y_train)
prediction = model.predict(x_test)
training_accuracy = model.score(x_train, y_train)
print ('Training Accuracy:',training_accuracy)
test_accuracy = model.score(x_test, y_test)
print ('Testing Accuracy:',test_accuracy)

Training Accuracy: 0.9381959347707082
Testing Accuracy: 0.8786703654459017
