In [16]:
import requests
import json
import numpy as np
from collections import OrderedDict

def array_to_hash(x):
    x_tupled = None
    if type(x) == list:
        x_tupled = tuple(x)
    elif type(x) == np.ndarray:
        x_tupled = tuple(list(x.flatten()))
    elif type(x) == tuple:
        x_tupled = x
    else:
        raise RuntimeError('unexpected type of input: {}'.format(type(x)))
    return hash(tuple(map(float, x_tupled)))

def almostEqual(x, y):
    return abs(x - y) < 1e-3


class Grader(object):
    def __init__(self):
        self.submission_page = 'https://www.coursera.org/api/onDemandProgrammingScriptSubmissions.v1'
        self.assignment_key = 'S1UqVXp-EeelpgpYPAO2Og'
        self.parts = OrderedDict([
                    ('edAEq', 'max_revenue'),
                    ('Xn0Ec', 'category_id_with_max_revenue'),
                    ('CZDVZ', 'num_items_constant_price'),
                    ('HlAjc', 'total_num_items_sold_var')])
        self.answers = {key: None for key in self.parts}

    @staticmethod
    def ravel_output(output):
        '''
           If student accedentally submitted np.array with one
           element instead of number, this function will submit
           this number instead
        '''
        if isinstance(output, np.ndarray) and output.size == 1:
            output = output.item(0)
        return output

    def submit(self, email, token):
        submission = {
                    "assignmentKey": self.assignment_key, 
                    "submitterEmail": email, 
                    "secret": token, 
                    "parts": {}
                  }
        for part, output in self.answers.items():
            if output is not None:
                submission["parts"][part] = {"output": output}
            else:
                submission["parts"][part] = dict()
        request = requests.post(self.submission_page, data=json.dumps(submission))
        response = request.json()
        if request.status_code == 201:
            print('Submitted to Coursera platform. See results on assignment page!')
        elif u'details' in response and u'learnerMessage' in response[u'details']:
            print(response[u'details'][u'learnerMessage'])
        else:
            print("Unknown response from Coursera: {}".format(request.status_code))
            print(response)

    def status(self):
        print("You want to submit these numbers:")
        for part_id, part_name in self.parts.items():
            answer = self.answers[part_id]
            if answer is None:
                answer = '-'*10
            print("Task {}: {}".format(part_name, answer))
               
    def submit_part(self, part, output):
        self.answers[part] = output
        print("Current answer for task {} is: {}".format(self.parts[part], output))

    def submit_tag(self, tag, output):
        part_id = [k for k, v in self.parts.items() if v == tag]
        if len(part_id)!=1:
            raise RuntimeError('cannot match tag with part_id: found {} matches'.format(len(part_id)))
        part_id = part_id[0]
        self.submit_part(part_id, str(self.ravel_output(output)))

In [17]:
import numpy as np
import pandas as pd
import os
grader = Grader()

In [18]:
DATA_FOLDER = "../input/competitive-data-science-predict-future-sales"
transactions    = pd.read_csv(os.path.join(DATA_FOLDER, "sales_train.csv"))
items           = pd.read_csv(os.path.join(DATA_FOLDER, "items.csv"))
item_categories = pd.read_csv(os.path.join(DATA_FOLDER, "item_categories.csv"))
shops           = pd.read_csv(os.path.join(DATA_FOLDER, "shops.csv"))

In [19]:
pd.to_datetime(transactions.date)

Print the shape of the loaded dataframes and use [`df.head`](https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.head.html) function to print several rows. Examine the features you are given.

In [20]:
print("Data shapes:")
display("transactions {}".format(transactions.shape))
display("items {}".format(items.shape))
display("item_categories {}".format(item_categories.shape))
display("shops {}".format(shops.shape))

In [21]:
print("Data heads:")
display("transactions ",transactions.head())
display("items ",items.head())
display("item_categories ",item_categories.head())
display("shops ",shops.head())

What was the maximum total revenue among all the shops in September, 2014?

In [22]:
transactions["year"] = pd.to_datetime(transactions["date"], format = "%d.%m.%Y").dt.year
transactions["month"] = pd.to_datetime(transactions["date"], format = "%d.%m.%Y").dt.month
transactions["revenue"] = transactions["item_price"]*transactions["item_cnt_day"]
cf = (transactions.year==2014)&(transactions.month==9)
max_revenue = transactions.loc[cf,:].groupby("shop_id")["revenue"].sum().sort_values().tail(1).values[0]
grader.submit_tag('max_revenue', max_revenue)

What item category generated the highest revenue in summer 2014?

In [23]:
cf = (transactions.year==2014)&(transactions.month.isin([6,7,8]))
category_id_with_max_revenue = transactions.loc[cf,:].join(items, on="item_id", how="inner", lsuffix='l', rsuffix='r').\
    groupby("item_category_id")["revenue"].sum().sort_values().tail(1).index[0]
grader.submit_tag('category_id_with_max_revenue', category_id_with_max_revenue)

How many items are there, such that their price stays constant (to the best of our knowledge) during the whole period of time?

In [24]:
num_items_constant_price = transactions.item_id.nunique()-((transactions.groupby("item_id")["item_price"].std(ddof=0)!=0).sum())
grader.submit_tag('num_items_constant_price', num_items_constant_price)

What was the variance of the number of sold items per day sequence for the shop with shop_id = 25 in December, 2014? Do not count the items, that were sold but returned back later.

In [25]:
cf = (transactions.year==2014) & (transactions.month==12) & (transactions.shop_id==25)
total_num_items_sold = transactions.loc[cf,:].groupby("date")["item_cnt_day"].sum()
days = transactions.loc[cf,"date"].unique()
total_num_items_sold_var = transactions.loc[cf,:].groupby(["date"])["item_cnt_day"].sum().var()
grader.submit_tag('total_num_items_sold_var', total_num_items_sold_var)

In [26]:
STUDENT_TOKEN = ""
STUDENT_EMAIL = ""
grader.status()

In [27]:
grader.submit(STUDENT_EMAIL, STUDENT_TOKEN)