## Libraries

In [1]:
import pandas as pd
import numpy as np


In [65]:
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import NearestNeighbors

In [3]:
import keras.backend as K
from keras.layers import Dense, Dropout
from keras.models import Model, Sequential
from keras.utils.np_utils import to_categorical

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


## Dataset

In [4]:
# Load the DataSet into memory using pandas read_csv() function
df = pd.read_csv('CSV Data/updated-deal-details-infomation.csv')

# head() to display TOP 5 rows
df.head()

Unnamed: 0,Deal_ID,Title,Category,Event,Location,Budget,Gathering,Event_Date,Deal_Date,Artists_Pitched,Artists_Requested,Artists_Requested_URL,status,lost_reason,Source
0,9619,Bhupendra shah deal,7.0,4.0,"Jaipur, Rajasthan, India",25000.0,350.0,18-02-2018 00:00,02-01-2018 04:09,", anurag-dixit, frequency-project, peprico, ku...",Karawan,karawan,lost,Old CRM,31.0
1,9275,Mohamed majid deal,3.0,10.0,"Nungambakkam, Chennai, Tamil Nadu, India",5000.0,30.0,24-02-2018 00:00,13-02-2018 13:37,", chennai-comedy",Alexander Babu,alexander-babu,lost,Old CRM,31.0
2,1489,Sarthak Shah,3.0,2.0,"Gandhinagar, Gujarat, India",250000.0,500.0,06-02-2018 00:00,03-09-2018 03:02,", gaurav-kapoor, jagdish-chaturvedi, rajneesh-...",Abhishek Upmanyu,abhishek-upmanyu,open,,
3,9713,Ayush Thomas deal,12.0,1.0,"Dwaraka Marg, Block E, Bhagwati Garden, Nawada...",1000000.0,2000.0,24-02-2018 00:00,29-01-2018 06:27,", guru-randhawa",Guru Randhawa,guru-randhawa,lost,Old CRM,31.0
4,4435,ranjan tyagi deal,1.0,4.0,"Ranchi, Jharkhand, India",75000.0,200.0,06-02-2018 00:00,23-04-2018 13:21,", mini-dewan",,,lost,No Response,32.0


In [6]:
# Convert Event_Date to pandas datetime & fetch event month
df['Event_Date'] = pd.to_datetime(df['Event_Date'], errors = 'coerce')
df['Event_Month'] = df['Event_Date'].dt.month

In [7]:
# Subset DataFrame for Singers, Comedians & Live Band
subset_df = df[(df.Category == 12) | (df.Category == 3) | (df.Category == 7)].copy()

In [8]:
subset_df.head()

Unnamed: 0,Deal_ID,Title,Category,Event,Location,Budget,Gathering,Event_Date,Deal_Date,Artists_Pitched,Artists_Requested,Artists_Requested_URL,status,lost_reason,Source,Event_Month
0,9619,Bhupendra shah deal,7.0,4.0,"Jaipur, Rajasthan, India",25000.0,350.0,2018-02-18,02-01-2018 04:09,", anurag-dixit, frequency-project, peprico, ku...",Karawan,karawan,lost,Old CRM,31.0,2.0
1,9275,Mohamed majid deal,3.0,10.0,"Nungambakkam, Chennai, Tamil Nadu, India",5000.0,30.0,2018-02-24,13-02-2018 13:37,", chennai-comedy",Alexander Babu,alexander-babu,lost,Old CRM,31.0,2.0
2,1489,Sarthak Shah,3.0,2.0,"Gandhinagar, Gujarat, India",250000.0,500.0,2018-06-02,03-09-2018 03:02,", gaurav-kapoor, jagdish-chaturvedi, rajneesh-...",Abhishek Upmanyu,abhishek-upmanyu,open,,,6.0
3,9713,Ayush Thomas deal,12.0,1.0,"Dwaraka Marg, Block E, Bhagwati Garden, Nawada...",1000000.0,2000.0,2018-02-24,29-01-2018 06:27,", guru-randhawa",Guru Randhawa,guru-randhawa,lost,Old CRM,31.0,2.0
7,16934,Kshitij deal,7.0,1.0,"Gurugram, Haryana, India",15000.0,150.0,2016-09-16,09-09-2016 19:09,"6ix-mhz-25276, fitoor-the-band, haze, ruhani, ...",Aanch,aanch,lost,Old CRM,31.0,9.0


## Keras Feature Vector Model

### Helper Functions

In [9]:
# Initialize the Standard Scaler
SS = StandardScaler()

# Initialize KNN
nbrs = NearestNeighbors(n_neighbors = 5, algorithm = 'ball_tree')

In [10]:
## Helper Function for Keras Models

def getModel(output_dim):
    K.clear_session()

    model = Sequential()
    model.add(Dense(16, activation = 'relu', input_shape = (4,)))
    model.add(Dense(32, activation = 'relu'))
    model.add(Dense(64, activation = 'relu'))
    model.add(Dense(128, activation = 'relu'))
    model.add(Dense(64, activation = 'relu'))
    model.add(Dense(32, activation = 'relu'))
    model.add(Dense(output_dim, activation = 'softmax'))

    model.compile(loss = 'categorical_crossentropy', optimizer = 'rmsprop', metrics = ['acc'])
    
    return model

def getSubsetModel(tempModel, layer_name):
    return Model(inputs = tempModel.input, 
                 outputs = tempModel.get_layer(layer_name).output)

In [11]:
# Helper Function to get Input and Output

def getFeatLabel(datafr, features, label):
    ## Features(Input) 
    X = datafr[features].values
    ## & Label(Output)
    y = category_df[label].values

    ## Convert the Lables to categorical form
    y = to_categorical(y)
    
    # Return X & y i.e Features & Label
    return (X, y)

In [121]:
# Helper Function

# GET the Feature Vector
def getKey(index_val, datafr):
    return datafr[(datafr.index == index_val)].iloc[0]['FeatureVector'].reshape(1, -1)

# GET a row from deals data frame
def returnDeal(index, pd_series):
    idx = pd_series[index]
    
    return df[df.Deal_ID == idx]

### Category as Output Variable

#### Features & Label

In [12]:
category_df = subset_df[['Deal_ID', 'Category', 'Event', 'Budget', 'Gathering', 'Event_Month']].copy()
category_df.dropna(inplace = True)

In [13]:
# Create a Mapping
## Singer (12) => 0
## Comedian (3) => 1
## Live Band (7) => 2

category_df.loc[category_df.Category == 12, 'Category'] = 0
category_df.loc[category_df.Category == 3, 'Category'] = 1
category_df.loc[category_df.Category == 7, 'Category'] = 2

In [14]:
# Shape of Features and Labels
cat_X, cat_y = getFeatLabel(category_df, 
                    features = ['Event', 'Budget', 'Gathering', 'Event_Month'], 
                    label = ['Category'])
cat_X.shape, cat_y.shape

((8921, 4), (8921, 3))

#### Normal Model
Passing Features (Input) without Scaling.

In [16]:
# GET the model to fit data
category_model = getModel(output_dim = 3)

## Fit the Model
category_model.fit(cat_X, cat_y, batch_size = 32, epochs = 10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x27ae64c4748>

#### Scaled Model
Passing Features (Input) after performing Scaling.

In [17]:
# Transform the Data
cat_Xtran = SS.fit_transform(cat_X)

In [36]:
# GET the model to fit data & Fit 
category_model = getModel(3)
category_model.fit(cat_Xtran, cat_y, batch_size = 32, epochs = 20, validation_split = 0.1)

Train on 8028 samples, validate on 893 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x27ae9dfac88>

In [49]:
category_model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 16)                80        
_________________________________________________________________
dense_2 (Dense)              (None, 32)                544       
_________________________________________________________________
dense_3 (Dense)              (None, 64)                2112      
_________________________________________________________________
dense_4 (Dense)              (None, 128)               8320      
_________________________________________________________________
dense_5 (Dense)              (None, 64)                8256      
_________________________________________________________________
dense_6 (Dense)              (None, 32)                2080      
_________________________________________________________________
dense_7 (Dense)              (None, 3)                 99        
Total para

In [47]:
category_model.evaluate(cat_X, cat_y)



[8.662060601683747, 0.46228001353158343]

#### KNN
Using second last layer of the Scaled Model to get Feature Vector for an Deal. Then, passing it to KNN Algorithm.

In [None]:
# Select the Subset Model from the Scale Model
subset_model = getSubsetModel('dense_6')

In [62]:
# Fit the StandardScaler
_cat_X = SS.fit(cat_X)

# Create a Empty List to Store all the Feature Vectors
feature_vectors = list()

# for each deal in the event_df dataset
for i in range(category_df.index.values.shape[0]):
    # GET the index value
    idx = category_df.index.values[i]
    
    # Select the features & scale (transform).
    x = category_df.loc[idx, ['Event', 'Budget', 'Gathering', 'Event_Month']]
    x = _cat_X.transform(x.reshape(1, -1))
    
    # GET the feature vector.
    x = x.reshape(1, -1)
    feat = subset_model.predict(x).ravel()
    
    # Append it to the feature_vector List
    feature_vectors.append((event_df.loc[idx, 'Deal_ID'], feat))

  import sys


In [67]:
# Create a dataframe for the feature_vector list
featureVec_df = pd.DataFrame(feature_vectors, columns = ['Deal_ID', 'FeatureVector'])
featureVec_df.head()

Unnamed: 0,Deal_ID,FeatureVector
0,9619,"[0.0041830316, 0.46768874, 0.0, 0.0, 0.0, 0.0,..."
1,9275,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
2,1489,"[0.4039299, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,..."
3,9713,"[0.25081688, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0..."
4,16934,"[0.1280865, 0.30308962, 0.0, 0.0, 0.0, 0.0, 0...."


In [88]:
# Mapping of Deal_ID to Index value in Feature Vector DataFrame
indx2dealID = pd.Series(featureVec_df['Deal_ID'], index =featureVec_df.index)

In [94]:
# Select the Feature Vectors & fi the KNN Algorithm
X = list(featureVec_df['FeatureVector'])
nbrs.fit(X)

NearestNeighbors(algorithm='ball_tree', leaf_size=30, metric='minkowski',
         metric_params=None, n_jobs=1, n_neighbors=5, p=2, radius=1.0)

In [101]:
# GET key i.e Input Feature Vector
key = getKey(4, featureVec_df)

# Pass Key to the kneighbors() function to get the Top 5 Neighbors with Distance & Indices
nbrs.kneighbors(key)

In [103]:
# Function to return a rows from the Deal Data Frame based on the index value
returnDeal(5)

Unnamed: 0,Deal_ID,Title,Category,Event,Location,Budget,Gathering,Event_Date,Deal_Date,Artists_Pitched,Artists_Requested,Artists_Requested_URL,status,lost_reason,Source,Event_Month
8,16935,Anant deal,7.0,1.0,"Greater Noida, Uttar Pradesh, India",10000.0,600.0,2016-09-17,09-09-2016 06:30,"6ix-mhz-25276, fitoor-the-band, haze, ruhani, ...",Ikrah,ikrah,lost,Old CRM,31.0,9.0


In [104]:
returnDeal(8650)

Unnamed: 0,Deal_ID,Title,Category,Event,Location,Budget,Gathering,Event_Date,Deal_Date,Artists_Pitched,Artists_Requested,Artists_Requested_URL,status,lost_reason,Source,Event_Month
14372,18090,Vinay Prasad deal,3.0,1.0,"Bengaluru, Karnataka, India",10000.0,900.0,2017-09-16,08-06-2017 11:02,,,,lost,Old CRM,32.0,9.0


#### END NOTE

### Event as Output Variable

#### Features & Labels

In [20]:
event_df = subset_df[['Deal_ID', 'Category', 'Budget', 'Gathering', 'Event_Month', 'Event']].copy()
event_df.dropna(inplace = True)

In [22]:
# Create a Mapping
event_df['Event'] = event_df['Event'].apply(lambda x: x - 1)

In [30]:
# Shape of Features and Labels
event_X, event_y = getFeatLabel(event_df, 
                    features = ['Category', 'Budget', 'Gathering', 'Event_Month'], 
                    label = ['Event'])
event_X.shape, event_y.shape

((8921, 4), (8921, 15))

#### Normal Model
Passing Features without Scaling 

In [31]:
event_model = getModel(15)
event_model.fit(event_X, event_y, batch_size = 32, epochs = 10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x27ae85e4978>

#### Scaled Model
Passing Features after Scaling.

In [32]:
event_Xtrans = SS.fit_transform(event_X)

In [154]:
event_model = getModel(15)
event_model.fit(event_Xtrans, event_y, batch_size = 32, epochs = 20)

In [155]:
event_model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 16)                80        
_________________________________________________________________
dense_2 (Dense)              (None, 32)                544       
_________________________________________________________________
dense_3 (Dense)              (None, 64)                2112      
_________________________________________________________________
dense_4 (Dense)              (None, 128)               8320      
_________________________________________________________________
dense_5 (Dense)              (None, 64)                8256      
_________________________________________________________________
dense_6 (Dense)              (None, 32)                2080      
_________________________________________________________________
dense_7 (Dense)              (None, 15)                495       
Total para

#### KNN
Using second last layer of the Scaled Model to get Feature Vector for an Deal. Then, passing it to KNN Algorithm.

In [134]:
# Select the Subset Model from the Scale Model
sub_model = getSubsetModel(event_model, 'dense_6')

In [113]:
# Fit the StandardScaler
_cat_X = SS.fit(event_X)

# Create a Empty List to Store all the Feature Vectors
feature_vectors = list()

# for each deal in the event_df dataset
for i in range(event_df.index.values.shape[0]):
    # GET the index value
    idx = event_df.index.values[i]
    
    # Select the features & scale (transform).
    x = event_df.loc[idx, ['Category', 'Budget', 'Gathering', 'Event_Month']]
    x = _cat_X.transform(x.reshape(1, -1))
    
    # GET the feature vector.
    x = x.reshape(1, -1)
    feat = sub_model.predict(x).ravel()
    
    # Append it to the feature_vector List
    feature_vectors.append((event_df.loc[idx, 'Deal_ID'], feat))

  if __name__ == '__main__':


In [114]:
# Create a dataframe for the feature_vector list
featureVec_df = pd.DataFrame(feature_vectors, columns = ['Deal_ID', 'FeatureVector'])
featureVec_df.head()

Unnamed: 0,Deal_ID,FeatureVector
0,9619,"[0.12421417, 0.21962073, 0.0, 0.20236364, 0.0,..."
1,9275,"[0.13208781, 0.14412951, 0.0, 0.10764038, 0.0,..."
2,1489,"[0.07700503, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0..."
3,9713,"[0.0, 0.20337734, 0.0, 0.6346768, 0.0, 0.09311..."
4,16934,"[0.20890461, 0.0, 0.0, 0.3500466, 0.0, 0.01232..."


In [140]:
featureVec_df.loc[10:20, :]

Unnamed: 0,Deal_ID,FeatureVector
10,11486,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.28120857, 0.0, 0.0..."
11,3734,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.39888817, 0.0, 0.0..."
12,14561,"[0.12087242, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0..."
13,9438,"[0.26366344, 0.20904714, 0.0, 0.20816803, 0.0,..."
14,17591,"[0.12421663, 0.21962178, 0.0, 0.20236333, 0.0,..."
15,18727,"[0.38943547, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0..."
16,922,"[0.21908273, 0.0, 0.0, 0.43872043, 0.0, 0.0, 0..."
17,972,"[0.13998075, 0.2077116, 0.0, 0.5102943, 0.0, 0..."
18,3255,"[0.0, 0.0, 0.0, 0.19550443, 0.0, 0.08660318, 0..."
19,7051,"[0.0, 0.0, 0.0, 0.33441928, 0.0, 0.1129024, 0...."


In [118]:
# Mapping of Deal_ID to Index value in Feature Vector DataFrame
indx2dealID = pd.Series(featureVec_df['Deal_ID'], index = featureVec_df.index)

In [119]:
# Select the Feature Vectors & fi the KNN Algorithm
X = list(featureVec_df['FeatureVector'])
nbrs.fit(X)

NearestNeighbors(algorithm='ball_tree', leaf_size=30, metric='minkowski',
         metric_params=None, n_jobs=1, n_neighbors=5, p=2, radius=1.0)

In [141]:
# GET key i.e Input Feature Vector
key = getKey(16, featureVec_df)

# Pass Key to the kneighbors() function to get the Top 5 Neighbors with Distance & Indices
nbrs.kneighbors(key)

(array([[0., 0., 0., 0., 0.]]),
 array([[4149,  306,   16, 4144, 4248]], dtype=int64))

In [142]:
# Function to return a rows from the Deal Data Frame based on the index value
returnDeal(16, indx2dealID)

Unnamed: 0,Deal_ID,Title,Category,Event,Location,Budget,Gathering,Event_Date,Deal_Date,Artists_Pitched,Artists_Requested,Artists_Requested_URL,status,lost_reason,Source,Event_Month
23,922,Karan Bhatia deal,12.0,14.0,"Delhi, India",50000.0,300.0,2018-04-14,18-02-2018 20:12,abazz,Akhil,akhil,lost,client don't want to book any artist,31.0,4.0


In [149]:
returnDeal(306, indx2dealID)

Unnamed: 0,Deal_ID,Title,Category,Event,Location,Budget,Gathering,Event_Date,Deal_Date,Artists_Pitched,Artists_Requested,Artists_Requested_URL,status,lost_reason,Source,Event_Month
455,916,KKGUPTA deal,12.0,12.0,"Delhi, India",50000.0,300.0,2018-04-20,18-02-2018 13:54,gitanjali-rai,Gitanjali Rai,gitanjali-rai,lost,Just Quote,31.0,4.0


#### END NOTE

## END