In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import copy
import multiprocessing as mp
from matplotlib import pyplot as plt


def sumWeights(key, model, history):
	vector = model[0][key]
	weightSum = 0
	for article in history[0]:
		if article in vector[1]:
			weightSum += vector[1][article]
	weightSum /= vector[0]
	return (key, weightSum)

def main():

	transactions = pd.read_csv('transactions25short.csv')

	#build customer transaction clusters
	customerTransactions = {}
	for i in range(len(transactions.index)):
		cust = transactions.at[i, 'customer_id']
		if cust not in customerTransactions:
			customerTransactions[cust] = []
		customerTransactions[cust].append(transactions.at[i, 'article_id'])
	customers = customerTransactions.keys()

	#weights until threshold: 0.1
	#input: a - decay rate between (0,1)
	#w_n = (1-a)^n
	def subOneExpDecayMask(a):
		mask = []
		temp = 1
		while temp >= 0.1:
			mask.append(temp)
			temp *= (1-a)
		mask.reverse()
		return mask;

	#weights until threshold: 0.1
	#input: a - decay rate between (0,1)
	#w_n = 2 - (1+a)^2
	def superOneExpDecayMask(a):
		if a == 0.5:
			return [0.5, 1]
		mask = []
		temp = 1
		while 2-temp >= 0.1:
			mask.append(2 - temp)
			temp = temp * (1+a)
		mask.reverse()
		return mask;

	#build article locality models for decay values: 
	#subExp: 0.2 - 0.5, 0.05
	#superExp: 0.07 - 0.15, 0.02

	#models: article# -> (largest weight, dict: article#, weight)

	subModels = []
	subMaskLengths = []
	superModels = []
	superMaskLengths = []

	subMaskDecayRates = [0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.91]
	for i in subMaskDecayRates:
		print("Sub {}".format(i))
		wMask = subOneExpDecayMask(i);
		model = {}
		for key in customers:
			#add weights for each article in customer history, including 0
			#build scaling window of size of mask
			#default dummy article values 0:-len to weight unique null positions
			temp = [-a for a in range(len(wMask))]
			temp.reverse()
			history = customerTransactions[key][:-1]
			for j in range(len(history)):

				#add weights to
				if history[j] not in model:
					model[history[j]] = [0, {}]
				localeVector = model[history[j]]
				for k in range(len(wMask)):
					#key: temp[k], value += mask[k]
					if temp[k] not in localeVector[1]:
						localeVector[1][temp[k]] = 0
					localeVector[1][temp[k]] += wMask[k]
					#update max weight
					if localeVector[1][temp[k]] > localeVector[0]:
						localeVector[0] = localeVector[1][temp[k]]

				#update temp for next element
				temp.append(history[j])
				temp = temp[1:]
		subModels.append(model)
		subMaskLengths.append(len(wMask))

	superMaskDecayRates = [0.15, 0.2, 0.3, 0.4, 0.5, 0.91]
	for i in superMaskDecayRates:
		print("Super {}".format(i))
		wMask = superOneExpDecayMask(i);
		model = {}
		for key in customers:
			#add weights for each article in customer history, including 0
			#build scaling window of size of mask
			#default dummy article values 0:-len to weight unique null positions
			temp = [-a for a in range(len(wMask))]
			temp.reverse()
			history = customerTransactions[key][:-1]
			for j in range(len(history)):

				#add weights to
				if history[j] not in model:
					model[history[j]] = [0, {}]
				localeVector = model[history[j]]
				for k in range(len(wMask)):
					#key: temp[k], value += mask[k]
					if temp[k] not in localeVector[1]:
						localeVector[1][temp[k]] = 0
					localeVector[1][temp[k]] += wMask[k]
					#update max weight
					if localeVector[1][temp[k]] > localeVector[0]:
						localeVector[0] = localeVector[1][temp[k]]

				#update temp for next element
				temp.append(history[j])
				temp = temp[1:]
		superModels.append(model)
		superMaskLengths.append(len(wMask))

	#input length 1
	print("Base")
	wMask = [1.0]
	baseModel = {}
	for key in customers:
		#add weights for each article in customer history, including 0
		#build scaling window of size of mask
		#default dummy article values 0:-len to weight unique null positions
		temp = 0
		history = customerTransactions[key][:-1]
		for j in range(len(history)):
			#add weights to
			if history[j] not in baseModel:
				baseModel[history[j]] = [0, {}]
			localeVector = baseModel[history[j]]
			#key: temp[0], value += mask[k]
			if temp not in localeVector[1]:
				localeVector[1][temp] = 0
			localeVector[1][temp] += wMask[0]
			#update max weight
			if localeVector[1][temp] > localeVector[0]:
				localeVector[0] = localeVector[1][temp]
			#update temp for next element
			temp = history[j]


	def insertOneSort(predictions):
		if len(predictions) > 1:
			index = 0
			while (index < len(predictions)-1 and predictions[0][1] > predictions[1][1]):
				temp = predictions[1]
				predictions[1] = predictions[0]
				predictions[0] = temp
				index += 1

	def valFunc(e):
		return e[1]


	subModelPredictions = []
	superModelPredictions = []
	subModelResults = []
	superModelResults = []
	#eval for models

	for i in range(len(subModels)):
		print("Sub model results {} of {}".format(i+1, len(subModels)))
		subModelResults.append([0, 0]) 	#predicted/missed
		subModelPredictions.append([])
		masklength = subMaskLengths[i]
		model = subModels[i]
		modelKeys = model.keys()
		customerCount = 0
		for customer in customers:
			customerCount += 1
			if customerCount % 1000 == 0:
				print(customerCount)
			history = customerTransactions[customer]
			test = history[-1]
			if len(history)-1 < masklength:
				temp = [-a for a in range(masklength)]
				temp.reverse()
				temp.extend(history)
				history = temp[-masklength-1:-1]
			else:
				history = history[-masklength-1:-1]
			predictions = []	#tuples of (article#, weightSum)
			
			for key in modelKeys:
				vector = model[key]
				weightSum = 0
				for article in history:
					if article in vector[1]:
						weightSum += vector[1][article]
				#weightSum /= vector[0]
				if len(predictions) >= 12:
					if weightSum >= predictions[0][1]:
						predictions[0] = (key, weightSum)
					insertOneSort(predictions)
				else:
					predictions.append((key, weightSum))
					insertOneSort(predictions)
			match = 0
			for j in range(len(predictions)):
				if predictions[j][0] == test:
					match = 1
			if match == 1:
				subModelResults[i][0] += 1
			else:
				subModelResults[i][1] += 1

			predictionString = ""
			for article in predictions:
				predictionString += str(article[0]) + " "
			
			subModelPredictions[i].append((customer, predictionString))

	for i in range(len(superModels)):
		print("Super model results {} of {}".format(i+1, len(superModels)))
		superModelResults.append([0, 0]) 	#predicted/missed
		superModelPredictions.append([])
		masklength = superMaskLengths[i]
		model = superModels[i]
		modelKeys = model.keys()
		customerCount = 0
		for customer in customers:
			customerCount += 1
			if customerCount % 1000 == 0:
				print(customerCount)
			history = customerTransactions[customer]
			test = history[-1]
			if len(history)-1 < masklength:
				temp = [-a for a in range(masklength)]
				temp.reverse()
				temp.extend(history)
				history = temp[-masklength-1:-1]
			else:
				history = history[-masklength-1:-1]
			predictions = []	#tuples of (article#, weightSum)
			for key in modelKeys:
				vector = model[key]
				weightSum = 0
				for article in history:
					if article in vector[1]:
						weightSum += vector[1][article]
				#weightSum /= vector[0]
				if len(predictions) >= 12:
					if weightSum >= predictions[0][1]:
						predictions[0] = (key, weightSum)
					insertOneSort(predictions)
				else:
					predictions.append((key, weightSum))
					insertOneSort(predictions)
			match = 0
			for j in range(len(predictions)):
				if predictions[j][0] == test:
					match = 1
			if match == 1:
				superModelResults[i][0] += 1
			else:
				superModelResults[i][1] += 1

			predictionString = ""
			for article in predictions:
				predictionString += str(article[0]) + " "
			
			superModelPredictions[i].append((customer, predictionString))

	#baseModel
	baseModelPredictions = []
	baseModelResults = [0, 0]
	print("Base model results")
	masklength = 1
	model = baseModel
	modelKeys = model.keys()
	customerCount = 0
	for customer in customers:
		customerCount += 1
		if customerCount % 1000 == 0:
			print(customerCount)
		history = customerTransactions[customer]
		test = history[-1]
		article = 0
		if len(history) > 1:
			article = history[-2]
		predictions = []	#tuples of (article#, weightSum)
		for key in modelKeys:
			vector = model[key]
			weightSum = 0
			if article in vector[1]:
				weightSum += vector[1][article]
			#weightSum /= vector[0]
			if len(predictions) >= 12:
				if weightSum > predictions[0][1]:
					predictions[0] = (key, weightSum)
					insertOneSort(predictions)
			else:
				predictions.append((key, weightSum))
				predictions.sort(key=valFunc)
		match = 0
		for j in range(len(predictions)):
			if predictions[j][0] == test:
				match = 1
		if match == 1:
			baseModelResults[0] += 1
		else:
			baseModelResults[1] += 1
		
		predictionString = ""
		for article in predictions:
			predictionString += str(article[0]) + " "
			
		baseModelPredictions.append((customer, predictionString))


	for i in range(len(subModelPredictions)):
		predictionList = pd.DataFrame(data=subModelPredictions[i], columns=['customer_id', 'prediction'])
		predictionList.to_csv('featurescorePredictions/subModelPredictionsv2{}.csv'.format(subMaskDecayRates[i]))
	for i in range(len(superModelPredictions)):
		predictionList = pd.DataFrame(data=superModelPredictions[i], columns=['customer_id', 'prediction'])
		predictionList.to_csv('featurescorePredictions/superModelPredictionsv2{}.csv'.format(superMaskDecayRates[i]))


	predictionList = pd.DataFrame(data=baseModelPredictions, columns=['customer_id', 'prediction'])
	predictionList.to_csv('featurescorePredictions/baseModelPredictionsv2.csv')

	file = open('TFScoreResults2.txt', 'w')

	file.write("SubModel Results:\n")
	for i in range(len(subModels)):
		file.write("inputLength: {}  | decayRate: {}  | predicted: {}  | missed: {}\n".format(subMaskLengths[i], subMaskDecayRates[i], subModelResults[i][0], subModelResults[i][1]))

	file.write("\nSuperModel Results:\n")
	for i in range(len(superModels)):
		file.write("inputLength: {}  | decayRate: {}  | predicted: {}  | missed: {}\n".format(superMaskLengths[i], superMaskDecayRates[i], superModelResults[i][0], superModelResults[i][1]))

	file.write("\nBaseModel Results:\n")
	file.write("inputLength: {}  | predicted: {}  | missed: {}\n".format(1, baseModelResults[0], baseModelResults[1]))
	
	file.close()



if __name__ == '__main__':
	main()
