-
Notifications
You must be signed in to change notification settings - Fork 5
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Signed-off-by: Philip Abbet <philip.abbet@idiap.ch>
- Loading branch information
0 parents
commit daa5111
Showing
60 changed files
with
1,270,589 additions
and
0 deletions.
There are no files selected for viewing
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,114 @@ | ||
############################################################################ | ||
|
||
Emotion-Based Recommendation Generator (EMORec v1.0) | ||
|
||
############################################################################ | ||
|
||
|
||
README: | ||
======= | ||
A Python library which performs emotion-based analysis and recommendation using a | ||
multiple-instance regression algorithm for a set of multimedia items described by | ||
transcripts. The algorithm is trained over 1200 TED talks using the original human- | ||
made transcripts and the corresponding community emotion labels. The library can be | ||
used in command line or directly in a Python program. It takes as input a JSON file | ||
which contains an array of dictionaries that describe the metadata of multimedia items | ||
and generates an output JSON file which contains the same items augmented with the | ||
following attributes: | ||
|
||
emotion_classes The class names of 12 TED community emotion labels | ||
emotion_scores Estimated values for 12 TED community emotion labels | ||
emotion_rec Recommended items based on these emotions | ||
emotion_rec_scores Confidence of the recommended item | ||
emotion_segments Textual segments that were used | ||
text The actual textual content of the segment | ||
start_time Starting time of the segment | ||
end_time Ending time of the segment | ||
relevance_scores Relevance which reveals the contribution of the segment | ||
to the prediction of the 14 emotion dimensions. | ||
|
||
FILES: | ||
====== | ||
The library contains the following files: | ||
|
||
ap_weights.py Data class for items (text extraction, preprocessing) | ||
crls.py Vector space class supporting TF-IDF, LSI, RP and LDA | ||
generate.py Main class responsible for generating recommendations | ||
data/ Data to be used for training | ||
models/ Pre-trained regression models on TED for emotion prediction | ||
parameters/ Optimal values obtained from cross-validation to be used | ||
for training and prediction | ||
|
||
|
||
USAGE: | ||
====== | ||
USAGE: python generate.py -input=<path> -output=<path> | ||
-input Path location of the input file in JSON format | ||
-output Path location of the output file in JSON format | ||
|
||
EXAMPLE: | ||
======== | ||
$ python generate.py --input=input.json --output=output.json --debug | ||
{'--debug': True, | ||
'--display': False, | ||
'--help': False, | ||
'--input': 'input.json', | ||
'--output': 'output.json', | ||
'--version': False} | ||
[+] Loading items:....................................[OK] | ||
[+] Modeling emotions: | ||
-> Unconvincing...............................[OK] | ||
-> Fascinating................................[OK] | ||
-> Persuasive.................................[OK] | ||
-> Ingenious..................................[OK] | ||
-> Longwinded.................................[OK] | ||
-> Funny......................................[OK] | ||
-> Inspiring..................................[OK] | ||
-> Jaw-dropping...............................[OK] | ||
-> Courageous.................................[OK] | ||
-> Beautiful..................................[OK] | ||
-> Confusing..................................[OK] | ||
-> Obnoxious..................................[OK] | ||
[+] Generating recommendations........................[OK] | ||
[+] Saving to output file.............................[OK] | ||
[x] Finished. | ||
|
||
DEPENDENCIES: | ||
============ | ||
1) Install python: http://www.python.org/getit/ | ||
2) Install pip: http://www.pip-installer.org/en/latest/installing.html | ||
3) Then: | ||
$ pip install docopt | ||
$ pip install json | ||
$ pip install pyyaml | ||
$ pip install numpy | ||
$ pip install scipy | ||
$ pip install gensim | ||
$ pip install nltk | ||
$ python | ||
>>> import nltk | ||
>>> nltk.download() | ||
|
||
TROUBLESHOOTING: | ||
================ | ||
Q: How can I use the library with items stored in other formats than JSON? | ||
A: You have to convert your file to JSON. | ||
Q: How can I use the library directly inside a Python program? | ||
A: Simply import the library in Python and initialize a generator object with | ||
the item dictionary of your preference. | ||
Q: Is there any attribute that is required to be present in the item metadata? | ||
A: Yes the 'id' attribute is mandatory. | ||
|
||
CONTACT: | ||
======== | ||
Nikolaos Pappas | ||
Idiap Research Institute | ||
Centre du Parc, | ||
CH 1920 Martigny, | ||
Switzerland | ||
E-mail: nikolaos.pappas@idiap.ch | ||
Website: http://people.idiap.ch/npappas/ | ||
|
||
--- | ||
Last update: | ||
8 Jul, 2014 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,120 @@ | ||
# Copyright (c) 2014 Idiap Research Institute, http://www.idiap.ch/ | ||
# Written by Nikolaos Pappas <nikolaos.pappas@idiap.ch>, | ||
# | ||
# This file is part of EMORec. | ||
# | ||
# EMORec is free software: you can redistribute it and/or modify | ||
# it under the terms of the GNU General Public License version 3 as | ||
# published by the Free Software Foundation. | ||
# | ||
# EMORec is distributed in the hope that it will be useful, | ||
# but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
# GNU General Public License for more details. | ||
# | ||
# You should have received a copy of the GNU General Public License | ||
# along with Foobar. If not, see <http://www.gnu.org/licenses/>. | ||
|
||
import numpy as np | ||
from scipy import sparse | ||
from scipy import linalg | ||
from numpy import dot, matrix | ||
from sklearn import linear_model | ||
from numpy.linalg import norm | ||
from scipy.sparse import lil_matrix | ||
from sklearn.base import BaseEstimator | ||
from constrained_rls import cRLS | ||
from sklearn.metrics import mean_absolute_error | ||
|
||
|
||
class APWeights(BaseEstimator): | ||
|
||
def __init__(self, iterations, l1=.5, l2=.5, l3=10, reg=None): | ||
self.iterations = iterations | ||
self.l1 = l1 | ||
self.l2 = l2 | ||
self.l3 = l3 | ||
self.f1 = None | ||
self.f2 = None | ||
self.f3 = None | ||
|
||
|
||
def fit(self, X, Y): | ||
M = X[0].get_shape()[1] # number of features | ||
N = len(X) # number of instances | ||
F = np.random.ranf((1,M)) # hyperplane to be learned | ||
H = matrix(np.zeros((N,M))) # bag representations | ||
P = [] | ||
Y_w = [] | ||
X_w = [] | ||
converged = False | ||
prev_error = 999999 | ||
it = 0 | ||
print "-"*100 | ||
|
||
print "L1: %f" % self.l1 | ||
print "L2: %f" % self.l2 | ||
print "L3: %f" % self.l3 | ||
print "M: %d" % M | ||
print "N: %d" % N | ||
|
||
print "[+] Training..." | ||
while(not converged and it < self.iterations): | ||
for i, Xi in enumerate(X): | ||
if it == 0: | ||
if X_w == []: | ||
X_w = Xi | ||
else: | ||
X_w = sparse.vstack([X_w, Xi ]) | ||
P.append(np.ones((1,X[i].get_shape()[0]))) | ||
Y_w.append([]) | ||
Xi = Xi.tocsr() | ||
if self.f2: | ||
HC = matrix(self.f2.predict(Xi)).T | ||
else: | ||
HC = Xi.dot(F.T).T | ||
self.f1 = cRLS(alpha=self.l1) | ||
P[i] = self.f1.fit(HC,Y[i],P[i]) | ||
Y_w[i] = self.f1.coef_ | ||
cur_p = sparse.csr_matrix(self.f1.coef_) | ||
H[i] = cur_p.dot(Xi).todense() | ||
|
||
self.f2 = linear_model.Ridge(alpha=self.l2) | ||
self.f2.fit(H,Y) | ||
pred = self.f2.predict(H) | ||
cur_error = mean_absolute_error(pred,Y) | ||
print "iteration %d -> (MAE: %f) " % (it, cur_error) | ||
self.coef_ = self.f2.coef_ | ||
if prev_error - cur_error < 0.000001: | ||
converged = True | ||
self.coef_ = self.f2.coef_ | ||
prev_error = cur_error | ||
it += 1 | ||
Y_w = np.hstack(Y_w) | ||
print "Training f3..." | ||
self.f3 = linear_model.Ridge(alpha=self.l3) | ||
self.f3.fit(X_w,Y_w) | ||
|
||
self.P = P | ||
self.H = H | ||
print "--/end" | ||
|
||
return F | ||
|
||
|
||
def predict(self, X): | ||
M = X[0].get_shape()[1] | ||
N = len(X) | ||
H = matrix(np.zeros((N,M))) | ||
W = [] | ||
for i, instances in enumerate(X): | ||
Xi = instances | ||
weights = matrix(self.f3.predict(Xi)).view(np.ndarray)[0] | ||
nweights = weights/sum(weights) | ||
W.append(nweights) | ||
H[i] = dot(weights, Xi.todense())[0] | ||
Y = self.f2.predict(H) | ||
self.P_test = W | ||
return Y | ||
|
Oops, something went wrong.