# Deep Learning Model
Using Embedding Matrix method

In [1]:
import pandas as pd
import numpy as np

In [31]:
from keras.layers import Embedding, Reshape, dot, Dense
from keras.models import Sequential
from keras.optimizers import Adamax
import keras.backend as K

In [87]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

In [3]:
# Load the Dataset into the Memory
df = pd.read_csv('CSV Data/Deal-Artists-Pitched.csv')
df.head()

Unnamed: 0,dealid,artistsid,pitched
0,5871,982,0
1,6414,982,0
2,3255,982,0
3,6438,982,0
4,6480,982,0


In [33]:
df.drop_duplicates().shape

(50540, 3)

In [125]:
num_deals = len(df['dealid'].unique())
num_artists = len(df['artistsid'].unique())

In [126]:
num_deals, num_artists

(76, 665)

In [10]:
latent_factors = 128

In [117]:
dealid_map = dict((idx, i) for i, idx in enumerate(df['dealid'].astype('category').cat.categories))
artistid_map = dict((idx, i) for i, idx in enumerate(df['artistsid'].astype('category').cat.categories))

In [122]:
idx2dealid = dict((i, idx) for idx, i in dealid_map.items())
idx2artistid = dict((i, idx) for idx, i in artistid_map.items())

## MODEL

In [243]:
deal_train = df['dealid'].map(dealid_map).values
artist_train = df['artistsid'].map(artistid_map).values

y_train = df['pitched'].values

In [244]:
deal_train.shape, artist_train.shape, y_train.shape

((50540,), (50540,), (50540,))

In [237]:
K.clear_session()

# Deal Embedding Matrix
deal_model = Sequential()

deal_model.add(Embedding(num_deals+1, latent_factors, input_length = 1))
deal_model.add(Reshape(target_shape = (latent_factors,)))

# Artists Embedding Matrix
artist_model = Sequential()

artist_model.add(Embedding(num_artists+1, latent_factors, input_length = 1))
artist_model.add(Reshape(target_shape = (latent_factors,)))

# Merge the model
model = Sequential()
model.add(Merge([deal_model, artist_model], mode = 'dot'))
model.add(Dense(128, activation = 'relu'))
model.add(Dense(64, activation = 'relu'))
model.add(Dense(32, activation = 'relu'))
model.add(Dense(1, activation = 'sigmoid'))



In [238]:
deal_model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (None, 1, 128)            9856      
_________________________________________________________________
reshape_1 (Reshape)          (None, 128)               0         
Total params: 9,856
Trainable params: 9,856
Non-trainable params: 0
_________________________________________________________________


In [239]:
artist_model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_2 (Embedding)      (None, 1, 128)            85248     
_________________________________________________________________
reshape_2 (Reshape)          (None, 128)               0         
Total params: 85,248
Trainable params: 85,248
Non-trainable params: 0
_________________________________________________________________


In [240]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
merge_1 (Merge)              (None, 1)                 0         
_________________________________________________________________
dense_1 (Dense)              (None, 128)               256       
_________________________________________________________________
dense_2 (Dense)              (None, 64)                8256      
_________________________________________________________________
dense_3 (Dense)              (None, 32)                2080      
_________________________________________________________________
dense_4 (Dense)              (None, 1)                 33        
Total params: 105,729
Trainable params: 105,729
Non-trainable params: 0
_________________________________________________________________


In [250]:
model.compile(loss = 'binary_crossentropy', optimizer = 'adamax')

In [251]:
model.fit([deal_train, artist_train], y_train, epochs = 5, validation_split = 0.1)

Train on 45486 samples, validate on 5054 samples
Epoch 1/5
Epoch 2/5

KeyboardInterrupt: 

In [246]:
y_pred = model.predict(x = [deal_train, artist_train])

print ("RMSE: {}".format(np.sqrt(mean_squared_error(y_train, y_pred))))
print ("MAE: {}".format(mean_absolute_error(y_train, y_pred)))

RMSE: 0.08464037194700556
MAE: 0.004095408715174045


In [168]:
# Using Sigmoid as the Activation for Last Layer and after 10 Epochs 
## Loss: MSE and Optimizer: Adam

y_pred = model.predict(x = [deal_train, artist_train])

print ("RMSE: {}".format(np.sqrt(mean_squared_error(y_train, y_pred))))
print ("MAE: {}".format(mean_absolute_error(y_train, y_pred)))

RMSE: 0.17820116833389407
MAE: 0.053409676745430276


In [247]:
def get_prob(DealID, ArtistID):
    
    X = [np.array(dealid_map[DealID]).reshape(-1, 1), np.array(artistid_map[ArtistID]).reshape(-1, 1)]
    
    return model.predict(X)

In [249]:
dealidx = 6556

print (f"Deal ID: {dealidx}\n")
for idx in df[(df['dealid'] == dealidx) & (df['pitched'] == 0)]['artistsid'].values:
    print (f"Artists ID: {idx} - Probability: {get_prob(5871, 94)}")

Deal ID: 6556

Artists ID: 982 - Probability: [[1.]]
Artists ID: 973 - Probability: [[1.]]
Artists ID: 971 - Probability: [[1.]]
Artists ID: 965 - Probability: [[1.]]
Artists ID: 963 - Probability: [[1.]]
Artists ID: 948 - Probability: [[1.]]
Artists ID: 94 - Probability: [[1.]]
Artists ID: 938 - Probability: [[1.]]
Artists ID: 934 - Probability: [[1.]]
Artists ID: 90 - Probability: [[1.]]
Artists ID: 890 - Probability: [[1.]]
Artists ID: 883 - Probability: [[1.]]
Artists ID: 88 - Probability: [[1.]]
Artists ID: 772 - Probability: [[1.]]
Artists ID: 75892 - Probability: [[1.]]
Artists ID: 75862 - Probability: [[1.]]
Artists ID: 75705 - Probability: [[1.]]
Artists ID: 750 - Probability: [[1.]]
Artists ID: 741 - Probability: [[1.]]
Artists ID: 734 - Probability: [[1.]]
Artists ID: 704 - Probability: [[1.]]
Artists ID: 682 - Probability: [[1.]]
Artists ID: 650 - Probability: [[1.]]
Artists ID: 613 - Probability: [[1.]]
Artists ID: 597 - Probability: [[1.]]
Artists ID: 578 - Probability: [

Artists ID: 236232 - Probability: [[1.]]
Artists ID: 236218 - Probability: [[1.]]
Artists ID: 236212 - Probability: [[1.]]
Artists ID: 236195 - Probability: [[1.]]
Artists ID: 236192 - Probability: [[1.]]
Artists ID: 236061 - Probability: [[1.]]
Artists ID: 235982 - Probability: [[1.]]
Artists ID: 235981 - Probability: [[1.]]
Artists ID: 235949 - Probability: [[1.]]
Artists ID: 235761 - Probability: [[1.]]
Artists ID: 235746 - Probability: [[1.]]
Artists ID: 235499 - Probability: [[1.]]
Artists ID: 235492 - Probability: [[1.]]
Artists ID: 235489 - Probability: [[1.]]
Artists ID: 235258 - Probability: [[1.]]
Artists ID: 235065 - Probability: [[1.]]
Artists ID: 235058 - Probability: [[1.]]
Artists ID: 235050 - Probability: [[1.]]
Artists ID: 235030 - Probability: [[1.]]
Artists ID: 235 - Probability: [[1.]]
Artists ID: 234965 - Probability: [[1.]]
Artists ID: 234954 - Probability: [[1.]]
Artists ID: 234945 - Probability: [[1.]]
Artists ID: 234937 - Probability: [[1.]]
Artists ID: 234889 

Artists ID: 147238 - Probability: [[1.]]
Artists ID: 147191 - Probability: [[1.]]
Artists ID: 147149 - Probability: [[1.]]
Artists ID: 147128 - Probability: [[1.]]
Artists ID: 147116 - Probability: [[1.]]
Artists ID: 147099 - Probability: [[1.]]
Artists ID: 147097 - Probability: [[1.]]
Artists ID: 147022 - Probability: [[1.]]
Artists ID: 1470 - Probability: [[1.]]
Artists ID: 147 - Probability: [[1.]]
Artists ID: 146 - Probability: [[1.]]
Artists ID: 144 - Probability: [[1.]]
Artists ID: 137006 - Probability: [[1.]]
Artists ID: 136882 - Probability: [[1.]]
Artists ID: 136853 - Probability: [[1.]]
Artists ID: 136839 - Probability: [[1.]]
Artists ID: 136818 - Probability: [[1.]]
Artists ID: 136785 - Probability: [[1.]]
Artists ID: 136757 - Probability: [[1.]]
Artists ID: 136746 - Probability: [[1.]]
Artists ID: 136744 - Probability: [[1.]]
Artists ID: 136743 - Probability: [[1.]]
Artists ID: 136737 - Probability: [[1.]]
Artists ID: 136736 - Probability: [[1.]]
Artists ID: 136726 - Probab