Skip to content

Commit

Permalink
import version used for paper
Browse files Browse the repository at this point in the history
  • Loading branch information
entron committed Apr 15, 2016
1 parent 26e7406 commit bfdf61c
Show file tree
Hide file tree
Showing 47 changed files with 1,004 additions and 20,066 deletions.
44 changes: 0 additions & 44 deletions README.md

This file was deleted.

51 changes: 51 additions & 0 deletions calculate_metric.py
@@ -0,0 +1,51 @@
# Compute the distance between two stores based on the definition in the paper.

import pickle
import random
import numpy

f = open('feature_train_data.pickle', 'rb')
(X, y) = pickle.load(f)

dictlist = [{} for _ in range(1115)]
for feature, sale in zip(X, y):
store = feature[1]
dictlist[store][tuple(feature[2:7])] = sale

with open("embeddings.pickle", 'rb') as f:
embeddings = pickle.load(f)
store_embeddings = embeddings[0]


def distance(store_pairs, dictlist):
'''Distance as defined in the paper'''
absdiffs = []
a, b = store_pairs
for key in dictlist[a]:
if key in dictlist[b]:
absdiffs.append(abs(dictlist[a][key] - dictlist[b][key]))
return sum(absdiffs) / float(len(absdiffs))


def embed_distance(store_pairs, em):
'''Distance in the embedding space'''
a, b = store_pairs
a_vec = em[a]
b_vec = em[b]
return(numpy.linalg.norm(a_vec - b_vec))

# Generate n random store pairs
n = 10000
pairs = set()
while len(pairs) < n:
a, b = random.sample(range(1115), 2)
if a < b:
pairs.add((a, b))


# Calcuate distances
with open('distances.csv', 'w') as f:
for pair in pairs:
d = distance(pair, dictlist)
d_em = embed_distance(pair, store_embeddings)
print(d, d_em, file=f)
226 changes: 0 additions & 226 deletions embedding_visualization.ipynb

This file was deleted.

625 changes: 0 additions & 625 deletions embedding_visualization_plotly.html

This file was deleted.

432 changes: 0 additions & 432 deletions embedding_visualization_plotly.ipynb

This file was deleted.

9 changes: 0 additions & 9 deletions extract.py → extract_csv_file.py
@@ -1,7 +1,5 @@
# -*- coding: utf-8 -*-
import pickle
import csv
from random import shuffle


def csv2dicts(csvfile):
Expand All @@ -28,7 +26,6 @@ def set_nan_as_string(data, replace_str='0'):

train_data = "train.csv"
store_data = "store.csv"
test_data = "test.csv"
store_states = 'store_states.csv'

with open(train_data) as csvfile:
Expand All @@ -39,12 +36,6 @@ def set_nan_as_string(data, replace_str='0'):
pickle.dump(data, f, -1)
print(data[:3])

with open(test_data) as csvfile:
data = csv.reader(csvfile, delimiter=',')
with open('test_data.pickle', 'wb') as f:
data = csv2dicts(data)
pickle.dump(data, f, -1)
print(data[0])

with open(store_data) as csvfile, open(store_states) as csvfile2:
data = csv.reader(csvfile, delimiter=',')
Expand Down
176 changes: 0 additions & 176 deletions extract_fb_features.py

This file was deleted.

38 changes: 0 additions & 38 deletions extract_google_trend.py

This file was deleted.

0 comments on commit bfdf61c

Please sign in to comment.