In [1]:
# Import pandas library
import pandas as pd
import decimal as D

D.getcontext().rounding = D.ROUND_HALF_UP
D.getcontext().prec = 33

# Read in our actual stock prices, name the columns, and then sort first on time, then on stock name
actual = pd.read_table("../input/actual.txt", delimiter="|", header=None, converters={2: D.Decimal} )
actual.columns = ["time", "stock", "price"]
actual.sort_values(by=["time", "stock"])
#actual["price"] = D.Decimal("{:.2f}".format(actual["price"]))

# Read in our predicted stock prices, name the columns, and then sort first on time, then on stock name
pred = pd.read_table("../input/predicted.txt", delimiter="|", header=None, converters={2: D.Decimal})
pred.columns = ["time", "stock", "price"]
pred.sort_values(by=["time", "stock"])
#pred["price"] = D.Decimal("{:.2f}".format(pred["price"]))

# Create a merged dataset by using a SQL-style inner join
# Will disregard any actual or predicted data rows that do not have a match in the other dataset
# Name the columns and calculate the absolute error
joined = pd.merge(pred, actual, how="inner", left_on=["time", "stock"], right_on=["time", "stock"], sort = True)
joined.columns = ["time", "stock", "actual", "predicted"]
joined["error"] = abs(joined["predicted"] - joined["actual"])
joined.sort_values(by=["time", "stock"])
#joined["error"] = 1000*round(joined["error"], 2)
print(type(joined["error"][0]))

# Read in the window size as an integer
window = int(open("../input/window.txt", "r").read())

x = [54, 55, 56, 57]
actual_i = actual[actual["time"].isin(x)]
pred_i = pred[pred["time"].isin(x)]
joined_i = joined[joined["time"].isin(x)]

print(actual_i.shape)
print(pred_i.shape)
print(joined_i.shape)

print(type(actual.iloc[0]["price"]))

# Open the output file for writing
output = open("output.txt", "w")

# Iterate over the number of windows we have
for i in range(1, max(max(actual["time"]), max(pred["time"])) - window + 2):
    
    # consider which time indices are in the window
    win_indices = [x for x in range(i, i + window)]
    
    # subset our data to those lying completely in the time window
    df_i = joined[joined["time"].isin(win_indices)]
    
    # if we have predictions in the window, calculate and output the mean error
    if(df_i.shape[0] > 0):
        #print(s)
        #print(df_i)
       # output.write("|".join([str(i), str(i+window-1), "{:.2f}".format(df_i["error"].mean())]) + "\n")
        output.write("|".join([str(i), str(i+window-1), "{:.2f}".format(round(D.Decimal(sum(df_i["error"]) / df_i.shape[0]), 2))]) + "\n")
        #output.write("|".join([str(i), str(i+window-1), str(df_i["error"].mean())]) + "\n")
        #output.write("|".join([str(i), str(i+window-1), str(round(sum(df_i["error"]) / df_i.shape[0],2))]) + "\n")
    # if we do not have any predictions in the window, output NA 
    else:
        #print("No data found...")
        output.write("|".join([str(i), str(i+window-1), str("NA")]) + "\n")
    
    if i == 54:
        print(D.Decimal(sum(df_i["error"]) / df_i.shape[0]))
        print(df_i["error"].mean())
        print(type(df_i["error"].mean()))
        print(type(sum(df_i["error"])))
        print(type(sum(df_i["error"]) / df_i.shape[0]))
        print(type(D.Decimal(sum(df_i["error"]) / df_i.shape[0])))
        
# Close our file handle
output.close()

print(round(D.Decimal(11.42/326), 2))

<class 'decimal.Decimal'>
(396, 3)
(326, 3)
(326, 5)
<class 'decimal.Decimal'>
0.0350306748466257668711656441717791
0.03503067484662577
<class 'numpy.float64'>
<class 'decimal.Decimal'>
<class 'decimal.Decimal'>
<class 'decimal.Decimal'>
0.04
