Skip to content

Commit

Permalink
win2
Browse files Browse the repository at this point in the history
  • Loading branch information
U-US\gtesei authored and U-US\gtesei committed Mar 18, 2016
1 parent 71790ea commit c7fa373
Show file tree
Hide file tree
Showing 4 changed files with 12 additions and 10 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@
output = pd.DataFrame(data={"id":test["id"],"sentiment":result})

# Use pandas to write the comma-separated output file
output.to_csv('Bag_of_Words_model.csv', index=False, quoting=3)
output.to_csv('Bag_of_Words_model.tsv', index=False, quoting=3)
print("Wrote results to Bag_of_Words_model.csv")


Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@ def makeFeatureVec(words, model, num_features):
featureVec = np.divide(featureVec,nwords)
return featureVec

def hash32(value):
return hash(value) & 0xffffffff

def getAvgFeatureVecs(reviews, model, num_features):
# Given a set of reviews (each one a list of words), calculate
Expand Down Expand Up @@ -137,7 +139,7 @@ def getCleanReviews(reviews):
print("Training Word2Vec model...")
model = Word2Vec(sentences, workers=num_workers,
size=num_features, min_count = min_word_count,
window = context, sample = downsampling, seed=1)
window = context, sample=downsampling, seed=1 , hashfxn=hash32)

# If you don't plan to train the model any further, calling
# init_sims will make the model much more memory-efficient.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ def create_bag_of_centroids( wordlist, word_centroid_map ):
print('>>> Loading Windows env ...')
os.chdir('C:/Machine_Learning/git/fast-furious/doc_ref/NLP/word2vec-nlp-tutorial/')

model = Word2Vec.load("300features_40minwords_10context")
model = Word2Vec.load("300features_40minwords_10context.tsv")


# ****** Run k-means on the word vectors and print a few clusters
Expand Down Expand Up @@ -95,8 +95,8 @@ def create_bag_of_centroids( wordlist, word_centroid_map ):
# Find all of the words for that cluster number, and print them out
words = []
for i in range(0,len(word_centroid_map.values())):
if( word_centroid_map.values()[i] == cluster ):
words.append(word_centroid_map.keys()[i])
if word_centroid_map.values()==cluster:
words.append(word_centroid_map.keys())
print(words)


Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
numpy==1.9.2
scipy
scikit-learn
nltk
pandas==0.16.0
numpy==1.9.2
scipy
scikit-learn
nltk
pandas==0.16.0
beautifulsoup4

0 comments on commit c7fa373

Please sign in to comment.