Set up HDFS and Google credentials

In [1]:
sc


In [2]:
import os

os.environ["GOOGLE_APPLICATION_CREDENTIALS"]="./imdb-e9e7ce7a779d.json"
os.environ["HDFSCLI_CONFIG"]="./.hdfscli.cfg"
os.environ["HADOOP_CONF_DIR"]="/opt/hadoop-3.1.0/etc/hadoop"
sc.environment["GOOGLE_APPLICATION_CREDENTIALS"]="/MovieScope-1bf4856cc738.json"

List filenames of reviews from HDFS and parallelize in preparation from processing

Parallelise the reviews and use Google NLP API to extract entities and related sentiment.

In [3]:
# Imports the Google Cloud client library
from google.cloud import language
from google.cloud.language import enums
from google.cloud.language import types

import pyspark.sql.functions as F
from pyspark.sql.window import Window as W
from pyspark.sql.types import *
from pyspark.sql import Row
import pyspark.sql.functions as functions
from pyspark.sql.functions import collect_list
from pyspark.sql.functions import collect_set
from pyspark.sql.functions import udf

#from pyspark.mllib.linalg import SparseVector, DenseVector, VectorUDT
from pyspark.ml.linalg import SparseVector, DenseVector, VectorUDT
from pyspark.ml.classification import NaiveBayes, NaiveBayesModel, RandomForestClassifier, RandomForestClassificationModel
from pyspark.ml.feature import CountVectorizer, CountVectorizerModel, HashingTF, IDF, IDFModel, StringIndexer, StringIndexerModel, IndexToString
from pyspark.ml import Pipeline
from pyspark.ml.evaluation import MulticlassClassificationEvaluator
from pyspark.ml.tuning import CrossValidator, ParamGridBuilder

from functools import reduce
import re
import numpy as np
from math import exp
import pickle
import pandas as pd
import base64

from spacy.lemmatizer import Lemmatizer
from spacy.lang.en import LEMMA_INDEX, LEMMA_EXC, LEMMA_RULES

In [4]:
def collectEntities(x, y):
    # The first reduce call doesn't pass a list for x, so we need to check for that.
    if not isinstance(x, list):
        x=[x]
        

    xd = dict(x)
    #print(xd)
    
    if not isinstance(y, list):
        y = [y]
        
    for ye in y:
        if ye[0] in xd:
            try:
                xd[ye[0]] = (xd[ye[0]]+ye[1])/2
            except:
                Null
        else:
            xd[ye[0]] = ye[1]
    
    return [o for o in xd.items()]
        

In [5]:
orientation = "pos"
collection="reviews"
urlsCollection="train"

Load genre information from file (previously collected using IMDB API)

In [6]:


def decodeGenre(x):
    try: 
        g = pickle.loads(base64.b64decode(x[2:-1]), encoding="bytes") 
        if (len(g)==0):
            return ["NA"]
        else:
            return g
    except:
        return ["NA"]    
        

def loadGenres(urlsCollection, orientation):
    genres = pd.read_csv("Data/genres_"+urlsCollection+"_urls_"+orientation+".csv", sep="\t", index_col=0, usecols=[1, 2, 3])
    genres = genres.fillna(value="b''")
    genres["GENRE"] = genres["GENRE"].apply(decodeGenre) 

    schema = StructType([
        StructField("FILM_ID", IntegerType(), True),
        StructField("GENRE", ArrayType(StringType(), containsNull=True), True)])

    genres_df = spark.createDataFrame(genres, schema)

    from pyspark.sql.functions import monotonically_increasing_id

    # This will return a new DF with all the columns + id
    genres_df = genres_df.withColumn("ID_TEMP", monotonically_increasing_id())#.limit(10)

    genres_df = genres_df.withColumn("ID",F.row_number().over(W.orderBy("ID_TEMP"))).select(["FILM_ID", "GENRE", "ID"])#.limit(10)
    
    return genres_df


In [8]:
#from pyspark.mllib.linalg import SparseVector, DenseVector, VectorUDT
from pyspark.ml.linalg import SparseVector, DenseVector, VectorUDT
from pyspark.sql.functions import udf

def sparse2dense(sp):
    return DenseVector(sp)




    
def separateGenres3(rec):
    print(rec)
    return [[genre, e, s] for (e, s) in rec.ENTITY_SENTIMENT for genre in rec.GENRE]

def prepareDataset(collection, orientation, urls):
    # Make sure we don't trigger Google Cloud API again
    entity_documents_info = spark.read.parquet("hdfs://spark-master:8020/user/lmrd/"+collection+"/"+orientation+"_doc_info2.pq")
    print("emtity_documents_info")
    entity_documents_info.show(5)

    genres_df = loadGenres(urls, orientation)
    print("genres_df")
    genres_df.show(5)
    
    entity_documents_info = entity_documents_info.alias("df1").join(genres_df.alias("df2"), entity_documents_info.ID == genres_df.ID)#.select(["df1.*", "df2.FILM_ID", "df2.GENRE"])
    print("entity_documents_info")
    entity_documents_info.show(5)
    
    grouped_entities = entity_documents_info.rdd.flatMap(separateGenres3)
    grouped_entities.repartition(5)
    print("grouped_entities")
    print(grouped_entities.take(5))
    
    grouped_entities_df = spark.createDataFrame(data=grouped_entities, schema=["genre", "entity", "sentiment"])
    grouped_entities_df.cache()
    print("grouped_entites_df")
    grouped_entities_df.show()
    
    grouped_entity_words = grouped_entities_df.select(["genre", "entity"]).groupBy("genre").agg(collect_list("entity").alias("entities"))
    print("grouped_entity_words")
    grouped_entity_words.show(5)
    
    return grouped_entity_words

def prepareDataset2(collection, orientation, urls):
    grouped_entities_df = spark.read.parquet("hdfs://spark-master:8020/user/lmrd/"+urlsCollection+"_"+orientation+"_grouped_entities2.pq")
    
    grouped_entity_words = grouped_entities_df.select(["genre", "entity"]).groupBy("genre").agg(collect_list("entity").alias("entities"))
    grouped_entity_words.show(5)
    
    return grouped_entity_words
    
def extractTFIDFDataframeAndModel(collection, orientation, urls):

    grouped_entity_words = prepareDataset2(collection, orientation, urls)
    
    # Create the dictionary
    countVec = CountVectorizer(inputCol="entities", outputCol="tf")
    #countVec = HashingTF(numFeatures=1024, inputCol="entities", outputCol="tf")
    idf = IDF(inputCol="tf", outputCol="tfidf")
    si = StringIndexer(inputCol="genre", outputCol="genreId")#, handleInvalid="keep")
    nb = NaiveBayes(featuresCol="tfidf", labelCol="genreId", predictionCol="predictGenreId")
    #rf = RandomForestClassifier(featuresCol="tfidf", labelCol="genreId", predictionCol="predictGenreId")
    #isModel = IndexToString(inputCol=nb.getPredictionCol(), outputCol="predictGenre")
    
#    grouped_entity_words = si.fit(grouped_entity_words).transform(grouped_entity_words)
    pipeline = Pipeline(stages=[countVec, idf, si, nb])
    #pipeline = Pipeline(stages=[countVec, idf, si, rf])
    
    
#    paramGrid = ParamGridBuilder() \
#        .addGrid(countVec.minTF, [1.0]) \
#        .addGrid(countVec.minDF, [1.0]) \
#        .build()
    
#    crossval = CrossValidator(estimator=pipeline,
#                  estimatorParamMaps=paramGrid,
#                  evaluator=MulticlassClassificationEvaluator(predictionCol="predictGenreId", labelCol="genreId", metricName="accuracy"),
#                  numFolds=3, parallelism=5)  # use 3+ folds in practice
    
#    cvModel = crossval.fit(grouped_entity_words)
    cvModel = pipeline.fit(grouped_entity_words)
    
    dft = cvModel.transform(grouped_entity_words)
    
    eval = MulticlassClassificationEvaluator(predictionCol="predictGenreId", labelCol="genreId")
    print("f1-score: ", eval.evaluate(dft))
    
    
    return cvModel

cvModel = extractTFIDFDataframeAndModel(collection, orientation, urlsCollection)

#(tfidf, cvmodel, idf, siModel, isModel) = extractTFIDFDataframe(collection, orientation, urlsCollection)
#tfidf.show(5)


+---------+--------------------+
|    genre|            entities|
+---------+--------------------+
|    Crime|[irwin yablan, is...|
|  Romance|[prince valiant, ...|
| Thriller|[abandonment, abb...|
|Adventure|[terminology, ter...|
|       NA|[achievement, anc...|
+---------+--------------------+
only showing top 5 rows

f1-score:  0.9999999999999996


In [81]:
f1-score:  0.9080459770114939


[0.0]

In [12]:


def checkSentimentValue(x):
    try:
        f = float(x)
        
        return f
    
    except:
        print("Wrong sentiment value ", f)
        return 0
    
def extractEntitiesSetimentForReview(review_contents):
    # Instantiates a client
    client = language.LanguageServiceClient()
        
    document = types.Document(content = review_contents, 
                             type=enums.Document.Type.PLAIN_TEXT, language="en-US")
    tries=1
    
    while tries < 5:
        try:
            entities = client.analyze_entity_sentiment(document=document, encoding_type="UTF8")
            break
        except:
            f = open("/home/etienne/sparklog.txt", mode="a")

            f.write(""+str(entities)+"\n")
            f.close()
            time.sleep(1)
            
            tries +=1
            
    
    
    # Make sure we have no duplicate entities. If we do, average their sentiment.
    justLetters = re.compile("[^a-z ]")
    response = [o for o in zip([lemmatizer(justLetters.sub("", entity.name.lower()), u"NOUN")[0] for entity in entities.entities], 
                               [checkSentimentValue(entity.sentiment.score) * checkSentimentValue(entity.sentiment.magnitude) 
                                    for entity in entities.entities])]
    
    
#    response = sorted(response, key=lambda x: x[0])
#    if (len(response)>1):
#        response = reduce(collectEntities, response)
    
            
    return response

In [15]:
def indexToLabel(cvModel, indexes):
    return [cvModel.bestModel.stages[2].labels[index] for index in indexes]

def indexToLabel2(cvModel, indexes):
    return [cvModel.stages[2].labels[index] for index in indexes]

indexToLabel2(cvModel, [0])
#pipeline = cvModel.bestModel.explainParams()  #getEstimator()
#siModel = pipeline.getStages()[2]

#nb = NaiveBayes(featuresCol="tfidf", labelCol="genreId", predictionCol="predictGenreId")

#nb_model = nb.fit(tfidf2)

#print(nb_model.pi)


['Animation']

In [9]:
# Evaluate the model
ds = prepareDataset2("test_reviews", orientation, "test")

ds.show(5)

+---------+--------------------+
|    genre|            entities|
+---------+--------------------+
|    Crime|[irwin yablan, is...|
|  Romance|[human behaviour,...|
| Thriller|[abandonment, abb...|
|Adventure|[terminology, ter...|
|       NA|[achievement, anc...|
+---------+--------------------+
only showing top 5 rows

+---------+--------------------+
|    genre|            entities|
+---------+--------------------+
|    Crime|[adventure fictio...|
|  Romance|[prince valiant, ...|
| Thriller|[abandonment, abb...|
|Adventure|[a bugs life like...|
|       NA|[achievement, anc...|
+---------+--------------------+
only showing top 5 rows



In [10]:
testpreds = cvModel.transform(ds)
#testpreds.take(1)[0].tf
testpreds.show()
eval = MulticlassClassificationEvaluator(predictionCol="predictGenreId", labelCol="genreId")
print("f1-score: ", eval.evaluate(testpreds))

+-----------+--------------------+--------------------+--------------------+-------+--------------------+--------------------+--------------+
|      genre|            entities|                  tf|               tfidf|genreId|       rawPrediction|         probability|predictGenreId|
+-----------+--------------------+--------------------+--------------------+-------+--------------------+--------------------+--------------+
|      Crime|[adventure fictio...|(11068,[0,1,2,3,4...|(11068,[0,1,2,3,4...|   26.0|[-34817.805468736...|[0.0,0.0,0.0,0.0,...|          26.0|
|    Romance|[prince valiant, ...|(11068,[0,1,2,3,5...|(11068,[0,1,2,3,5...|   12.0|[-39251.119157738...|[0.0,0.0,0.0,0.0,...|          12.0|
|   Thriller|[impacting, impal...|(11068,[0,1,2,3,4...|(11068,[0,1,2,3,4...|    2.0|[-40805.489888920...|[0.0,0.0,1.0,0.0,...|           2.0|
|  Adventure|[a bugs life like...|(11068,[0,1,2,3,4...|(11068,[0,1,2,3,4...|   25.0|[-22377.138669431...|[0.0,0.0,0.0,0.0,...|          25.0|
|     

In [20]:


#revText=["I went and saw this movie last night after being coaxed to by a few friends of mine. I'll admit that I was reluctant to see it because from what I knew of Ashton Kutcher he was only able to do comedy. I was wrong. Kutcher played the character of Jake Fischer very well, and Kevin Costner played Ben Randall with such professionalism. The sign of a good movie is that it can toy with our emotions. This one did exactly that. The entire theater (which was sold out) was overcome by laughter during the first half of the movie, and were moved to tears during the second half. While exiting the theater I not only saw many women in tears, but many full grown men as well, trying desperately not to let anyone see them crying. This movie was great, and I suggest that you go see it before you judge."]
#revText=["A revelation BrentCarleton13 July 2007 Though it has somehow or other managed to escape all the standard reference books this film is a real and unheralded discoverya visually distinguished and absorbing Gothic thriller halfway between Bava country and the Mexican gothics like The Witchs Mirror  Blancheville Monster where have you been hiding Right off the bat we are treated to a deliciously evocative visual openinga wintry bare tree ridden country road awash with an icy looking rain beyond which stands the distant castle on the mountain  Ann Radcliffe would be well pleased And the film keeps on delivering two attractive female leads a mysterious and scarred man locked in a tower room frequent thunder and lightning and all the necessary accoutrements of remote castle Gothic from flickering torches to doleful turns on the family harpsichord  The countryside in which the film is shot has the desolate beauty of a November day and strongly suggests the Brittany in which the film is set though not shot In this sense some of the landscapes resemble those in Bressons Diary of a Country Priest Devotees of the genre are herewith advised to seek this out Its far more deserving than some of its overhyped relatives 21 out of 21 found this helpful Was this review helpful Yes No  Report this  710 Guide me to my tomb daddy Coventry3 July 2006 The opening sequences show both titles Horror and The Blancheville Monster neatly after each other as if the distributors couldnt really decide which of the two was better andor more appropriate They should have just called it The Blancheville Horror Problem solved Anyway that was totally irrelevant Im more and more becoming a fan of director Alberto De Martino Hes been making good horror movies consistently from the early 60s until the mid 80s yet he never received the respect and appreciation that other Italian directors did This stylish and severely underrated picture came out alongside loads of other Gothic horror movies and although not as brilliant as say Black Sunday or Kill BabyKill its a hugely atmospheric and powerfully compelling chilltale with an overall decent script and convincing set pieces Only a couple of days prior to her 21st birthday a beautiful girl travels back to her wealthy fathers castle accompanied by her new lover and best friend Upon arrival her brother informs her about the tragic incident that supposedly killed her father and how he got madly obsessed with the legend of the Blancheville family curse But the new arrivals notice that there are a lot more strange things going on Why have all the servants been replaced Why is the brother so nervous about the girls upcoming birthday And most of all who or what produces those creepy screams at night in the castles darkest tower The Blancheville Monster offers pretty much all the elements youre looking for in good Gothic horror including eerie thunderstorms scary blackcaped monsters ominous vaults and tight costumes that supply the female cast members with impressive cleavage De Martino does a great directing job spreading the suspense equally throughout the whole film and the finale  albeit not too hard to predict  is formidably tense Unlike any of Mario Bavas Gothic horror movies The Blancheville Monster will not haunt your nightmares but for fans of classic horror its definitely worth seeing 17 out of 19 found this helpful Was this review helpful Yes No  Report this Somewhat slow short on horror but eerie rixrex17 May 2006 This pretty good Cormanstyled quasiPoe entry complete with Vincent Price lookalike and red herrings is a little too plodding for its own good and needs some more frequent and stronger moments of horror to make it to the very good level The same can be said of the CormanPoe films and such yet they had great indelible scare moments fabulous art direction in color and Vincent Price to keep them interesting This looks like a late night spook show staple from the 60s and 70s and is fine as such except I saw it without commercials I kept thinking it would be better if Id watched it on an old BW 20 portable TV on a wirerack TV stand with potato chips and soda and a chair with lumpy cushion on a chilly and windy October night just to get into the period 13 out of 14 found this helpful Was this review helpful Yes No  Report this  710 Another interesting Italian horror soqueljosh18 January 2008 I sought out this little gem after reading about it in Tim Lucas bio of Mario Bava Lucas believes the effects in this 1962 bw strongly resemble Bavas technical flourishes I sought out this title for this reason but also because I collect films of the era containing the word monster in the title I also have a growing appreciation for Gothic Italian horror and mystery After snapping it up on Amazon for about five bucks including postage I am pleased to announce that it was well worth my time and effort  There is a lot to like about this movie including an eerie score and loads of captivating locations and sets the crumbling abbey is reminiscent of Universals Dracula but much more realistic and effectively photographed Moments of the film seem over the top and overcooked but it stands up well for its time overall This is especially desirable to those of us who recall the old Shock Theatre days of television and enjoy Italian Gothic horror of the 60s 7 out of 7 found this helpful Was this review helpful Yes No  Report this  610 Faux Corman but not bad at all LaughingGravy27 February 2005 I had no idea what to expect from this one but it turned out to be Italys response to the Roger Corman Edgar Allan Poe films and its actually pretty good  In late 19th century northern France lovely Emily De Blancheville returns to her ancestral home from finishing school to find that her brother has sacked the entire staff and all the new servants are creepy Worse yet her father  whom she had believed to be killed in a fire  is actually alive but hideously burned and criminally insane and locked up in the tower Her brother explains that there is a curse on the De Blancheville line and their father believes that the curse can only be broken if Emily dies before her 21st birthday which is coming up so close that theyve already got the castle bedecked with festive balloons Well to make 89 minutes short the father escapes and pretty soon Emily is in for a bad time of it  What I liked about this film Its produced by Llama Films which has to make a person smile The leading ladies are all lovely and Emilys little peekaboo nightie is extremely flattering yeah yeah I know men are pigs Oink oink The location and sets are amazing with real castles and genuine ruins its so cold even indoors that you can constantly see the actors breath The monsters makeup seems to be some kind of Kharis mask Italian style The dialog is priceless if a tad wordy You will follow me Emily To your tomb To your death To die To die To die To die Have I mentioned it was produced by Llama Films Oh and the brother Roderick what else is played by a guy who looks like Vincent Price At least in the context of this film There are a lot of other shenanigans with a doctor who is not what he appears a housekeeper who is exactly what she appears a premature burial et al but I dont like to give away too much of the plot Sometimes you have to just let the film run its course and try not to think about what it all means for months or even years 11 out of 14 found this helpful Was this review helpful Yes No  Report this  510 Occasionally atmospheric Italian Gothic RedBarracuda1 February 2010 The Blancheville Monster is a decent Italian Gothic entry Its true that it is a little plodding and uneventful however its occasionally eerie and stylish The story is about a badly disfigured count who visits his daughter by night attempting to induce her into committing suicide to revoke an old family curse Frankly the story is absolutely ridiculous I mean couldnt the count just kill his daughter rather than embark on his moonlit walks with her to the family mausoleum Well yes however these spooky encounters do actually provide the film with its best Gothic imagery These scenes set in the dead of night with the sleepwalking daughter being followed a few paces behind by the blackclad monstrous count through the ruins to the family tomb are very striking So really plot inconsistencies have to be weighed against this Although I do have a little problem with the final confrontation where the daughter escapes from her entombed situation without even a hint of how this came to be Nevertheless these observations aside The Blancheville Monster is not bad The location is well used and there is some decent photography to accentuate this If you are a fan of 60s Italian Gothic then its really one to check out 4 out of 4 found this helpful Was this review helpful Yes No  Report this  510 Mesmerizing Italian Horror Film BaronBl00d20 November 2005 A young beautiful blonde her best friend from college with her brother and an admirer of her blonde beauty reach home the stately residence of the famed and fabled Blachevilles Upon arriving there we meet the brother Roderick a scary looking housekeeper that seems to possess a station way above her ranks and a doctor that oozes something not closely related to charm It seems that all the old servants have been let go and that the patriarch  the father has recently died  or not I liked this film for several reasons If nothing else it has atmosphere The castle used is an impressive set The black and white cinematography accentuates the rooms of the castle a huge family tomb grounds swirling with fog and a lot more We have a huge organ being grinded out in a few scenes in classic horror classic fashionsay that fast five times The story and the monster are nothing fantastic at all but the film works because the artful direction is able to build suspense Now if you are the type of viewer that needs a lot of action then you really must stay away as this film is more talking and all red herringsin what turns out to be a not all that clever mystery of who is the Blancheville monster There is some rather preposterous story about an ancient curse on the family that will die off when the last female descendant turns 21  utter rubbish But it is incidental when one looks at the way the film was filmed and the pace slowly creeps up from slower to slow and then finally fast at the end in the revelation of what has lain cloaked the whole time This was an Italian production with lots of Spanish influencemuch of the cast is Spanish The actors are all decent and able to make you believe in them at times This is by no means a great horror film but it as other reviewers have noted is in the same vein as a Roger Corman movie of the same period or a black and white Hammer film This is the Euro version with no big stars but a decent story and lots of mood 6 out of 7 found this helpful Was this review helpful Yes No  Report this  610 Not a Bad Scary Film biggertuna16 January 2005 A list of the neat elements of the film aka  Horror  aka The Blanchville Monster1 Its Italian 2 black and white 3 Horror sneakily appears as an occult film but really a psychological horror film and 4 the ultracreepy background music I watched this movie when I was a 10year old watching it on shock theater late Saturday night There are a few boring parts and some parts that should never been put into the film such as Rodrigues playing the keyboards But Im a little prejudiced on bragging on this movie because Ive never seen an Italian horror film I didnt like  Black Sunday Black Sabbath Suspiria Inferno etc  4 out of 5 found this helpful Was this review helpful Yes No  Report this  710 Gothic horror brings on genuine frights markwaltz8 March 2016 Warning Spoilers There are no vampires witches or ghosts but the horror in this genuinely creepy Italian grande guignol are impossible to deny Better than many others of the same themes this is still equally chilling only missing the presence of Barbara Steele even though there is a role in this that seems to have been written for her  Every element that makes films of this type so intriguing is there and while it is said to have been influenced by the stories of Edgar Allan Poe there is enough in it to make it seem fresh and filled with its own ideas An old family curse is said to be out to kill the daughter of a recently deceased count Allegedly her surviving past the age of 21 will end their reign so her father is rumored to still be alive and determined to kill her   While it was pretty obvious to me what was going on it is fun watching everything unfold The blonde girls are the heroines while the beautiful brunette housekeeper is assumed to be evil This is one worth rewatching a rare quality of the many films in this genre which are often too laughable to believe 2 out of 2 found this helpful Was this review helpful Yes No  Report this  310 Death by exposition Andy McGregor29 March 2014 Warning Spoilers 2 out of 3 found this helpful Was this review helpful Yes No  Report this        310 The Blandville Monster vegeta398625 September 2009 Warning Spoilers 2 out of 3 found this helpful Was this review helpful Yes No  Report this        510 Slow moving Italian Gothic heavily influenced by the CormanPoe cycle Leofwinedraca25 November 2015 Italian cinema has a long history of ripping off successful movies and this ItalianSpanish coproduction is no exception Cashing in on the success of Roger Cormans Edgar Allan Poe movies with Vincent Price this movie  purporting to be from Poe himself  relies heavily on the plot ingredients and atmosphere found in the Corman flicks The setting is an old dark castle the plot involves genetic madness and disfiguration and everything that goes on is steeped in mystery and suspense Theres even a supporting character the doctor whos been made up to look a lot like Vincent price Sadly as with most ripoffs THE BLANCHEVILLE MONSTER is an inferior product and it lacks the genuine originality found in other Italian Gothic movies from the same period CASTLE OF BLOOD TERROR OF DR HICHCOCK BLACK Sunday are just a few I could name Thats why youll almost never hear this film mentioned in the same breath as the others The main problem is that BLANCHEVILLE tries too hard and the stodgy script doesnt help When it tries to be scary it ends up being boring and there just isnt enough of the slim storyline to pad out a whole movie even with the crews best efforts  The film does boast some fine moments and these are mainly down to director Albert De Martino a mainstay of the genre for a good 2030 years Scenes of the heroine being pursued through a dead wood by a deformed killer are superbly creepy and the whole buried alive aspect of the plot is handled effectively  its just a shame it takes an hour and ten minutes to get there The cast cant be faulted either with a very good turn from Gerard Tichy in the Vincent Price role  the sinister older brother who has dark secrets of his own Okay so Ombretta Colli isnt much of an actress but shes pretty and in a visual film like this that counts for something I have to say that I preferred Helga Line HORROR EXPRESS here appearing in an early role as a scheming villainess With good blackandwhite visuals and some great scare scenes THE BLANCHEVILLE MONSTER could have been up there with the other films of this period Instead its a merely adequate flick that might well be just too damned slow for modern tastes 1 out of 1 found this helpful Was this review helpful Yes No  Report this  710 I Never Heard Of It But Im Happy I Gave It A Chance AllNewSux15 August 2012 This film and a lot of the cast were unfamiliar to me and Im a horror fanatic so I assume that most of you never heard of it It is reminiscent of other Gothic horror films but I say that in the best of terms The settings are cold and sterile which adds to the creepiness It starts off a little slow with Emily returning from school but its kind of cool because everyone else is acting so weird and you only know as much as she does Keeping the viewer in the dark as to what is going on puts you into the lead characters position Then when you think the strangeness has been explained suddenly the cast starts acting odd again You are never put at ease for the heroines safety and everything about the ending was pretty surprising Suspenseful horror movies and old Twilight Zone episodes can be fun to watch when you dont know why everyone is acting so strange You and perhaps the lead character are the only ones who arent in on the joke or riddle or curse in the case of the Blancheville Monster 1 out of 1 found this helpful Was this review helpful Yes No  Report this  410 Not so Enchanting Hitchcoc11 January 2007 Warning Spoilers 2 out of 4 found this helpful Was this review helpful Yes No  Report this  510 Horror interesting enough to keep your attention mike196411 August 2001 This 1962 import is a semigothic haunted house movie that rates about average Story is about a college girl who travels back to her home castle with her best friend and best friends brother When she arrives she finds the staff has completely changed Her brother tells her that their father died in a fire The bother looks rather sinister and frequently plays moody songs on the piano  We are introduced to a strange doctor and a beautiful but odd housekeeper We finally find out that the girls father is not dead just horribly mutilated from the fire We are told the father is deranged and believes in the old family curse which says that the daughter must die before age 21  The father escapes and then most of the rest of the movie throws suspicion onto all the other cast members as the daughter walks around in a trance Pretty eerie throughout and in the end the daughter is buried alive I wont spoil the ending but youll probably guess whats going on before the climax of the film 2 out of 4 found this helpful Was this review helpful Yes No  Report this  310 Unexpected Plot Julian R White13 September 2017 Warning Spoilers 0 out of 0 found this helpful Was this review helpful Yes No  Report this  710 Atmospheric slowmoving but perfectly watchable Italian Gothic Prichards1234518 August 2017 Warning Spoilers 0 out of 0 found this helpful Was this review helpful Yes No  Report this      810 A Good Gothic Italian Horror  My Style of Horror Film Rainey Dawn19 October 2016 A very atmospheric Gothic styled horror  its right up my alleyway This this is the kind of horror that sends shivers up and down my spine madness twisting plot fog big old castle revenge murder coffins cobwebs creaking noises etc its subtle horror mixed with a bit of a mystery that kept me highly entertained To me the film had the feeling or look of a 60s BW TV Show or made for TV movie rather than something that played on the large screen at the picture shows  yet that seemed to add to the film rather than take away from it OH expect to hear things like hes wanting to murder you followed by the Dadumb music like the old TV mystery or horror shows  its not just the look  overall feel of the film but the music that adds to the TV show feeling when watching  Very good movie I recommend to fans of Hammer Amicus and other classic horror film studios and to those that like the older mysteries  810 0 out of 0 found this helpful Was this review helpful Yes No  Report this  710 Fun atmospheric horror film preppy319 June 2014 This is a dubbed Italian horror film that takes place in 1884 France Emily has just come home to the huge spooky family castle after finishing school With her are her fianc John Taylor and his sister Alice At home she discovers her father has died and her brother is in charge with all new servants and a suspicious doctor Then there are screams in the night a disfigured monster roaming about and a family curse that threatens Emily  It doesnt make a whole lot of sense but it works This is they type of horror film they used to show late at night on local TV stations Theres no nudity sex or swearing and very minor blood and no gore It takes place in a beautiful and very eerie castle drenched with atmosphere The black and white photography only helps and there are even a few dark and stormy nights thrown in This isnt really scary but it is lots of fun The kind of horror movie to watch late at night with the lights off and some popcorn handy I give it a 7 0 out of 0 found this helpful Was this review helpful Yes No  Report this  510 Your worst nightmare Uncle Fester bkoganbing16 May 2011 The Blancheville Monster adapted from an Edgar Allan Poe story would have needed the touch of Vincent Price to truly make this a classic As it is the film is like a home town group theater production of a the same story  Its an Italian production of a British couple with an American brother and sister visiting a sinister French chteau in Brittany Cant get more international than that but Poe liked to vary his story locations he was not a chauvinistic American just as Jules Verne was not chauvinistic Frenchman  The younger sister of the Marquess of Blancheville is the subject of some frightening experiences because a family curse has said one female in the family must die before the age of 21 to insure long life for the rest More than that I cant say but these things do take on a life of their own at times  Although the producers didnt mean it as the Addams Family was a few years in the future it didnt help to have one of the living nightmares of the dead father of the surviving Blancheville brother and sister look like Uncle Fester  Its not a bad film but I do wish these things were exclusively the property of Vincent Price 0 out of 0 found this helpful Was this review helpful Yes No  Report this  510 A deadringer no way catfisher27 July 2010 Warning Spoilers 0 out of 0 found this helpful Was this review helpful Yes No  Report this                310 Dont be fooled ronevickers27 January 2010 Dont be fooled by the scary and impressive opening sequence which promises much but delivers very little as this film proceeds In so many ways this is a typical Italian horror effort of its time The performances are patchy at best and the direction is hamfisted and laborious In fact the pace of the film is so slow that the viewer is impelled to almost mentally wind the thing on fast forward There are far too many pofaced lingering glances that simply serve to become quite hilarious after a time Ive rarely seen a film that constantly shows the main characters staring silently at one another One gets the odd feeling of it being a silent movie with a dubbed dialogue thrown in as an afterthought On the plus side it does have one or two interesting scenes which provide a bit of a creepy atmosphere but these are not sustained in any positive way and this is to the detriment of the whole film Fans of the genre will be severely disappointed by this mediocre effort 0 out of 0 found this helpful Was this review helpful Yes No  Report this  310 You are getting sleepy Zeegrade9 December 2009 Titling this film Horror when it supplies anything but is akin to labeling a bottle of water Fire though The Blancheville Monster did give me some wicked yawns Blond beauty Emily De Blancheville has returned from college to the family mansion along with her lovely friend Alice and her brother John whos become quite smitten by the soon to be twentyone year old Upon reuniting with her brother Rodrigue Emily begins to notice that her deceased fathers attendants have all been replaced While the guests dine that night during a thunderstorm Alice hears the distinct sound of a man howling in pain which is quickly dismissed by Rodrigues not quite so friendly maid Elenore The not so furtive glances between Rodrigue Eleonore and the Doctor you think theyre hiding something leads Alice to investigate the screams later that night only to discover gasp Emilys father is not dead Im just drenched in disbelief With the cat out of the bag Rodrigue tells Emily the truth about their father and how hes gone mad due to a curse etched onto a family gravestone what you cant erase it that predicts the end of the Blancheville family if one of the daughters lives past her twentyfirst birthday I guess a black hole opens up and sucks them in I suppose The father escapes only to return to Emilys bedside using some form of hypnotism to lure her to her own tomb Caped skulking ensues for the rest of the film Yeah  Sorry to be such a downer as most of the reviews of this movie seem to have positive things to say however Ive always been a glasshalfempty kind of guy and I grew weary of the banality of this talkheavy film To me it seemed like 87 very long minutes Its hard to build up any suspense when Rodrigue and his castle staff give you that guiltyashell vibe the moment you meet them It doesnt exactly take a sleuth to figure out whos behind all of this either The Blancheville Monster is like a kids connect the dots if there where only two dots on the page I do give it credit for the two beautiful female leads especially Emilys willowy nightgown that shows her perky breasts See I did find something positive to say Watch only if you wish to impress early film snobs or you happen to run out of Ambien 0 out of 0 found this helpful Was this review helpful Yes No  Report this  710 Everything Seems Morose And Deathlike ferbs5428 December 2011 Warning Spoilers 1 out of 2 found this helpful Was this review helpful Yes No  Report this    610 THE BLANCHEVILLE MONSTER Alberto De Martino 1963 12 MARIO GAUCI7 January 2011 This films reputation rests largely on its generic original title  HORROR which actually precedes the English moniker here interestingly the director did not Anglicize his name as was the norm for the overseas release version though later he would often bill himself Martin Herbert It is very much an imitation of AIPs contemporaneous Poe cycle with family curses premature burials and prowling maniacs galore albeit filmed in monochrome The 16mm print blown up to 35mm I watched resulted in soft visuals and often had characters heads lopped off  surprisingly however the English dubbing was not halfbad  The cast though decidedly lowkey is nonetheless effective of the three male leads Gerard Tichy as the brooding castle owner invariably called Roderick or Rodrigue depending on the pronunciation and Leo Anchoriz as the obligatory inhouse doctor come off best Their luscious distaff counterparts  also attesting to the films multilingual crew  are filled by the Italian Ombretta Colli as the returning and obviously imperiled sister Iran Eory named after the country from which she emanates and playing the heroines companion who vaguely arouses the romantic attention of the two men I mentioned earlier though this aspect of the plot is largely taken up by Collis liaison with Eorys own bland sibling and German Helga Line a EuroCult stalwart and therefore the bestknown of the lot  though perhaps muchtooyoung for the sinister housekeeper role which is another archetypal figure in this particular subgenre  The plot cowritten by brothers Bruno and Sergio Corbucci who would themselves eventually graduate to the directors chair though both preferred to steer clear from Fantasy thereafter such as it is provides no surprises This extends even to the real identity of the hooded fire victim behind the attempted fulfillment of the prophecy by which the central family is bound Much is made too of how several people are bent to the villains will through hypnosis  though only the heroine ever succumbs to sleepwalking throughout The finale incorporates an effective nightmare sequence and the mandatory bloodthirsty return from the grave routine which unfortunately is rather abruptly presented here 1 out of 2 found this helpful Was this review helpful Yes No  Report this  410 I didnt find too much going for this film Chuck Straub16 April 2004 The Blancheville Monster is as much of a mystery movie as it is a horror film It actually had more going for it as a mystery movie rather than a horror movie but even there I considered it second rate The problem with this 1964 Italian movie is that I didnt think it was too good at either The characters werent interesting and the movie seemed to drag Although this is a horror film I didnt find it very scary It was somewhat interesting but not enough for me to give it a good recommendation There are times that it was even boring To sum it all up Id have to say that I wouldnt go out of my way to watch this one There are a lot of better ones out there This should be in the lower half of the list 1 out of 3 found this helpful Was this review helpful Yes No  Report this  610 chilly for the actors not for the viewers Bezenby23 February 2012 The Blancheville Monster is of those Gothic horror deals where folks harbour a dark secret fall in love in the blink of an eye and chase each other around at night It also reminded me somewhat of a Scooby Doo episode although Im not sure why  A girl arrives home from college with her two US buddies to find that the staff at her country estate have been replaced by a maid that stares at folks a lot a butler who likes staring and a doctor who also stares a lot Her dad has sadly passed away and her brother seems to be in charge of the household and staring  However the girl and her friend keep hearing howling in the middle of the night and whos the badly scarred guy locked up in the tower And whats with all the ominous staring Loads of secrets and twists in this one but I cant but notice the lack of bite that "]
#revText=["This movie was based on the true story and it did a great job of re-creating what actually happened. The creepy way the little girl talks to the seemingly empty rocking chair and the 3:15 am horrifying nightmares over and over again would make anyone feel a little freaked out. The reaction of anyone who has anything to do with the church should have been a sign to get out of this house. The flies, cold spots, and eyes peering in and outside of the windows are all things that actually happened inside that house in Amityville Long Island. I recommend this movie to anyone who wants to enjoy a scary movie or for anyone who has been freightened of a house before. This movie does the trick without any fancy special effects."]
revText=["George and Kathy Lutz are looking for a place to anchor down and raise a family The Lutzs and their children Kathys from a previous marriage settle on an impossibly cheap large and beautiful shore house But 28 days later the macabre and scary happenings force them to leaveleaving all their earthly possession behind During the course of those 28 days the family goes through all kinds of hell a room full of flies demonic voices and a pig with glowing red eyes What kind of past does that house have that would make everything horribly wrong Based on a true story  Real or hoax you decide but The Amityville Horror has all the trappings of an excellent haunted house story Too bad that the filmmakers falter a bit with a lack of character development that also ends up stifling the actors in the film that would have helped out immensely A creepy music score the one rejected for The Exorcist and several good setpieces help out but the end of the second act kind of gets stale A good supernatural thriller 48 out of 60 found this helpful Was this review helpful Yes No  Report this  810 Yeah it has problems but I still love it Brandt Sponseller7 April 2005 George James Brolin and Kathleen Lutz Margot Kidder buy a dream house in Amityville New York for a dream price Unfortunately the price was low because just a year before the house was the location of the Ronald DeFeo Jr murdershe killed his entire family while they were sleeping As a priest Father Delaney Rod Steiger blesses the home he realizes with horror that something evil is lingering there The dream house is turning into a nightmare  Sometimes our affection for or aversion to an artwork that weve been exposed to a number of times over the years is inextricably enmeshed with our historical emotional experiences whether we admit this or not For example I strongly dislike soap operas or indeed any dramas that resemble soap operas This is probably due to the fact that for years my only exposure to soap operas was when I was home sick from school as a kid These were the days before cable television and home video In the middle of a weekday afternoon you either watched soap operas or you didnt watch television Subconsciously I associate soap operas with a feeling of illness  Likewise Jay Ansons Amityville Horror novel appeared when I was still a teen I loved it I can still remember reading it in one long sittingsomething I rarely didin the family car as we drove from Florida to Ohio to visit relatives I was excited when the film appeared and liked it a lot at the time  So although I can see many faults with Amityville Horror now I still have a deep affection for it that triggers my brain to go into an apologetic mode and defend the film I just cant bring myself to give it lower than an 8 out of 10 and even that seems low to me But I can easily see how audiences lacking a history with the film might dislike it It is relatively slow uneventful and meanderingwith a modern perspective the pacing and subtlety are reminiscent of some recent Asian horror At the same time maybe paradoxically scenery chewing has only rarely had a greater ally  Just a couple days ago MGM released newly remastered widescreen versions of Amityville 1 2 and 3 I havent seen the film look this good since seeing it in the theater in 1979 and it probably didnt even look this good then The first thing that struck me was how incredible much of the cinematography is Director Stuart Rosenberg had an amazing knack for finding intriguing angles for shots and imbuing them with beautiful colors  Unlike recent trends Rosenbergs colors are not narrowed down to a single scheme For example in some shots such as some of the interiors of the famed Amityville house we get fabulous combinations of pale greens and yellows In others such as many exterior shots near the house we get intense combinations of fall foliage colors There are also a number of beautiful shots of the famed eye window exterior of the house in differently tinted negative colors  Rosenberg evidences a great eye for placing his cast in the frame and shooting scenes to create depth and symbolism via objects that partially block or surround the frame He also has a knack for creating winding receding patterns of objects that enhance depth through perspective My affection for this aspect of the film has little nostalgic attachment as I didnt pay attention to such things as a kid I didnt start noticing them more until I started painting far into my adult years and the positive aspects of the cinematography were hardly discernible on the previous ridiculously bad pan  scan VHS release  Of course most people arent watching a film like this for the aesthetics of the visual composition This is one of the most famous haunted house films after all The horror is handled somewhat awkwardly occasionally absurdly but it still works well enough for me as understated as it is Im not referring to the acting just the horror objects Aspects such as the ubiquitous flies reminded me of similar motifs such as water in Hideo Nakatas horror films such as Ringu 1998 and Dark Water 2002 The beginning of the film showing the Defeo murders still has a lot of shock value despite its relative postTarantino tameness Most of the horror elements are more portentous but theyre regular and interesting enough to hold your attention as long as you dont mind subtlety  Subtlety however was the furthest thing from the casts minds Brolin Kidder and especially Steiger shout their lines more often than they speak them Overacting is not in their vocabularies Kidder comments on an accompanying documentary that the horror genre walks a fine line between intensity and camp That may or may not be true in general but in Amityville Horror camp is frequently broached For me it has a certain charm Im a fan of camp and so bad its good Amityvilles performances often attain both  The commentary on the new DVD is amusing given the 1970s publicity that the book and film depicted a true haunting and the subsequent thorough debunking by persons such as Stephen Kaplan Hans Holzer a parapsychologist who has been involved with the story since the early days and the author of a book upon which Amityville II was based provides the commentary He presents himself as an academic but he obviously seems to have little concern for objectivity or skepticism He not only still talks about the story as true he invents supernatural excuses for the DeFeo murders and then some barely mentioning detractors such as Kaplan  If you havent seen the film yet you should base your viewing decision on whether you have a taste for deliberately paced horror as well as a tolerance for extremely overthetop performances The film is historically important in the genre as well 77 out of 100 found this helpful Was this review helpful Yes No  Report this Bizarre Bad Badly Bizarre Bizarrely Fascinating curtis martin25 October 2004 Warning Spoilers 29 out of 35 found this helpful Was this review helpful Yes No  Report this      610 Nothing great only the eyelike windows were creepy n Brolins performance was good Fellashibby14 May 2017 Saw this on a VHS in the mid 80s Revisited it recently on a DVD To be honest i found the movie to be tedious n tame then Now i jus forwarded some boring scenes The film opens on a dark and stormy night as we hear gunshots and see flashes of light through the homes famous eyelike upstairs windows as an entire family is killed A new family moves in after a year n unsettling things begin to occur Ther are scenes where the walls drip blood Was it blood or tar i dont know Whose blood it was or where it came from was never explained Theres a hidden room in the house the dog always barks at In one scene James Brolin climbs the stairs above that room only to fall through them and into a pit of the same bloodtar Was that suppose to be comedic Also the scene involving Rod Steiger with the flies wasnt scary at all The movie was boring considering the length n nothing happens Somewhere around 0116 Josh Brolin breaks open a wall n his facial expressions n eyes r epic scene man What he sees that makes him so startled we never come to know n we dont get to see also The film is helped by an extremely creepy score composed by Lalo Schifrin n Brolin delivered a good performance Margot Kidder did a decent job 12 out of 13 found this helpful Was this review helpful Yes No  Report this  910 Dated but great seanahalpin17 February 2002 True the special effects arent so special these days True the girl with the braces brings tears of laughter rather than terror But nonetheless this movie remains a creepy gem from my young days Everyone misses the point that the real star of the movie is the house The building is both attractive and sinister  truly gothic in the importance of the setting Whether the story is true or not if you want a movie to snuggle on the couch in the dark eating popcorn feeling the thrill of a ghost story scare this is one for you 58 out of 76 found this helpful Was this review helpful Yes No  Report this  810 Get outindeed SmileysWorld26 April 2002 I was but a timid lad of 14 when taken to a drivein theater to see this incredibly effective horror filmThere is no better monster to create a film around than the Devil himselfWhen Rod Steigers characterFather Delaney is in the process of blessing the houseand was greeted by a resounding shout of Get OutI almost took it literallyit was that effectiveThe Devil is indeed one unwelcome houseguest that is very hard to kick outas you will see when you watch this filmJames Brolin and Margo Kidder head a young family who are the new inhabitants of a home where brutal murders had taken place years beforeSoonstrange happenings begin to haunt the familyas the house has trouble letting go of what had happened thereThis movie is definitely in my top 10 horror films that I have seenand if you enjoy being scared out of your witsthis film will do it for youGive it a look 47 out of 62 found this helpful Was this review helpful Yes No  Report this  810 OLD FASHIONED HORROR richard cavellero16 December 2004 Excited about the remake I decided to go out and just but the original Amityville Horror Being a huge horror buff I just had to and besides I had only seen some of its absurd sequels hearing mixed reviews from friends and critics from terrifying to hilarious I turned it on with my boyfriend at the time and prepared myself for something scary I must say that I was quite impressed And although slightly disappointed in some of the films scenes ultimately I must say this is one old fashioned scary flick I can hugely recognize the appeal it had in its its hey day With the exception of Texas chainsaw Massacre Evil Dead the Omen and some others I rarely see what people did in their horror classics nowadays Like The Exorcist pretty damn boring and funny in my opinion But getting back to this film It builds a creeping mood filled with fright inducing suspense The effects are simple but effective and the performances are somewhat over the top but necessarily wacky The films overall lasting appeal has little to do with the films apparent campiness it has more to do with the real terror inducing legend that inspired it Like the Chainsaw remake the new ones looks to amp up the horror and intensity which would be greatly welcomed Although a great horror classic Amityvilles finale is somewhat anti climactic and after a long and impressively scary buildup it fails to deliver the end goods But whoa some of the scenes from the imaginary friend Jody flying out the window to the visitor at the door to the voice in the house and just everything in the basement this film is all about delivering some authentic chills 810777"]
revTextRdd = sc.parallelize(revText)

lemmatizer = Lemmatizer(LEMMA_INDEX, LEMMA_EXC, LEMMA_RULES)
sc.broadcast(lemmatizer)

#entitiesForTest = revTextRdd.flatMap(extractEntitiesSetimentForReview)
entitiesForTest = revTextRdd.flatMap(extractEntitiesSetimentForReview)

print(entitiesForTest.collect())
schema1 = StructType(
                [StructField("entity", StringType(), False), 
                 StructField("sentiment", FloatType(), False)])

entitiesForTest_df = spark.createDataFrame(entitiesForTest, schema=schema1)


entitiesForTest_df.registerTempTable("df")
grouped_entities_df2 = spark.sql("select ltrim(rtrim(entity)) as tentity, avg(sentiment) as avg_sent, stddev(sentiment) as std_sent from df group by tentity having abs(avg_sent)>0.3")
#grouped_entities_df2.registerTempTable("grouped_entities_df")
#grouped_entities_df3 = spark.sql("select genre, tentity as entity, avg_sent, std_sent from grouped_entities_df where abs(avg_sent)>0.3 order by genre, entity, avg_sent desc")

grouped_entities_df3 = grouped_entities_df2.withColumn("std_sent", functions.when(functions.isnan(grouped_entities_df2.std_sent)==True, functions.abs(grouped_entities_df2.avg_sent)).otherwise(grouped_entities_df2.std_sent))


entitiesForTest2_df = grouped_entities_df3.agg(collect_set('tentity').alias('entities')).crossJoin(grouped_entities_df3.agg(collect_set("avg_sent").alias("avg_sent"), collect_set("std_sent").alias("std_sent")))




#entitiesForTest_df.show()
#entitiesForTest2_df = entitiesForTest_df.agg(collect_set('entity').alias('entities')).crossJoin(entitiesForTest_df.agg(collect_set('sentiment')).alias("sentiment"))

entitiesForTest2_df.show()



[('film', -1.4000000208616257), ('the amityville horror', 0.0), ('teen', 0.0), ('lack', -0.5199999886751172), ('house', 0.0), ('child', -0.16000000476837162), ('report', -0.10000000149011612), ('film', 0.0), ('family', -0.16000000476837162), ('room', -0.32000000953674324), ('yes no', 0.0), ('house', -0.480000026226044), ('rod steiger', -0.11000000402331356), ('filmmaker', -0.16000000476837162), ('dream house', -0.16000000476837162), ('jr murdershe', -0.08000000238418581), ('possession', -0.040000001192092904), ('shore house', 0.0), ('house', -0.16000000476837162), ('review', -0.040000001192092904), ('one', -0.09000000715255752), ('family', -0.16000000476837162), ('haunted house story', -0.010000000298023226), ('story', -0.040000001192092904), ('novel', 0.3800000008940696), ('price', -0.16000000476837162), ('everything', -0.36000002861023006), ('dream house', -0.18000001430511503), ('happening', -0.16000000476837162), ('kind', -0.25), ('place', -0.09000000715255752), ('dream price', -0.

+--------------------+--------------------+--------------------+
|            entities|            avg_sent|            std_sent|
+--------------------+--------------------+--------------------+
|[fall foliage col...|[1.45000004768371...|[0.0, 0.572756452...|
+--------------------+--------------------+--------------------+



In [21]:
entitiesForTest3_df = cvModel.transform(entitiesForTest2_df)

entitiesForTest3_df.show()

print(indexToLabel2(cvModel, [int(entitiesForTest3_df.select("predictGenreId").take(1)[0].predictGenreId)]))

scores = entitiesForTest3_df.select("probability").collect()[0].probability
print(scores)
probs=scores
#probs = [1-(1.0*s)/np.sum(scores) for s in scores]

#print(probs)
print(np.argsort(probs))
top = np.argsort(probs)[:-10:-1]
#top = np.argsort(probs)[0:3]


print(indexToLabel2(cvModel, top))

+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+--------------+
|            entities|            avg_sent|            std_sent|                  tf|               tfidf|       rawPrediction|         probability|predictGenreId|
+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+--------------+
|[fall foliage col...|[1.45000004768371...|[0.0, 1.450000047...|(11068,[0,33,70,1...|(11068,[0,33,70,1...|[-327.49246516239...|[0.02298414628850...|          18.0|
+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+--------------+

['Film-Noir']
[0.02298414628850469,0.0037102992878302564,4.2566762618178346e-08,0.0023377174207513023,1.4340246936082423e-05,1.0428892442411147e-07,0.01858465688726912,0.000533127

In [47]:
print(indexToLabel2(cvModel, [int(entitiesForTest3_df.select("predictGenreId").take(1)[0].predictGenreId)]))

['News']


In [39]:


entitiesForTest3_df = cvmodel.transform(entitiesForTest2_df)

entitiesForTest3_df = entitiesForTest3_df.withColumn("features", udf_to_DenseVector("tf"))

entitiesForTest3_df = model.transform(entitiesForTest3_df)

entitiesForTest3_df = isModel.transform(entitiesForTest3_df)

print(isModel.getLabels())
scores = entitiesForTest3_df.select("rawPrediction").collect()[0].rawPrediction
print(scores)

probs = [1-(1.0*s)/np.sum(scores) for s in scores]
#probs = [exp(np.max(scores) - s) for s in scores]
print(probs)
print(np.argsort(scores))
entitiesForTest3_df.show()

['Animation', 'Talk-Show', 'Thriller', 'Adult', 'War', 'Horror', 'Documentary', 'NA', 'Biography', 'Comedy', 'Western', 'Fantasy', 'Romance', 'Family', 'Drama', 'Short', 'Sport', 'History', 'Film-Noir', 'Reality-TV', 'Music', 'Mystery', 'Musical', 'Sci-Fi', 'Game-Show', 'Adventure', 'Crime', 'Action', 'News']
[-537.7891618608271,-587.9934842823056,-518.4590082407616,-585.93812581499,-543.6674228395556,-538.8842749128711,-553.6583065705015,-582.2730094151456,-537.4807382920243,-514.2212682051768,-556.3789659243172,-523.747313855792,-513.583636157141,-530.9484400011238,-506.7379617621209,-558.7893495978734,-554.9934586069586,-546.6821482616766,-561.8042696801848,-584.53159521691,-552.8948283999592,-534.1307261361959,-544.3075497937868,-529.3509095570462,-586.2027202433444,-518.7623805869365,-523.9627650786558,-514.3693191457652,-588.0464268898968]
[0.9660284817794389, 0.9628571329036251, 0.9672495451858455, 0.9629869675164696, 0.9656571588445305, 0.9659593047568444, 0.9650260462956444, 0