In [8]:
import requests
import pandas as pd
import numpy as np
import numpy.random as rd
from datetime import datetime

In [9]:
def data_clearing(data: dict):
    '''
    Function for data clearing
    All data will be format (date in date format, products between 1 and 7 for
    int values, products between 8 and 20 for float value). Sales with
    quantitative smaller than zero  and  products outside the range 1 to 20
    will be erased
    : param data: a dictionary with the sales
    : return: a pd.DataFrame affer data clearing
    '''
    for i in range(len(data)):
      clean_data = data[i].copy()
      clean_data['date'] = datetime.fromtimestamp(data[i]['date']).strftime("%d-%m-%Y")
      clean_data['week'] = datetime.fromtimestamp(data[i]['date']).strftime("%U")
      clean_data['month'] = datetime.fromtimestamp(data[i]['date']).month         # Date-formatted value
      clean_data['year'] = datetime.fromtimestamp(data[i]['date']).year

      for key, value in data[i].items():                                          # Clean data
        if key not in ['id','date']:
          if value <= 0:
            clean_data.pop(key)                                                   # Erase product sales with quantitative smaller than zero
          elif key in PRODUCT_UNIT:
            clean_data[key] = int(data[i][key])                                   # Int-formatted values ​​for products between 1 and 7
          elif key in PRODUCT_KG:
            clean_data[key] = float(data[i][key])                                 # Float-formatted values ​​for products between 8 and 20
          else:
            clean_data.pop(key)                                                   # Erase product sales for products not between 1 and 20          
      
      for j in PRODUCT_UNIT:
        if j not in clean_data:
          clean_data[j] = 0	
          
      for j in PRODUCT_KG:
        if j not in clean_data:
          clean_data[j] = 0.0
          
      data[i] = clean_data.copy()
    return pd.DataFrame(data)

In [10]:
def request_server(url: str ='http://localhost:3000/api/ep1'): 
    sales_week  = requests.get(url).json()
    sales_week = data_clearing(sales_week)
    sales_week = sales_week[COLUMNS_ORDER]      
    return sales_week

In [11]:
def all_sales(df_sales_week, file: str = 'all_sales.csv'):
    try: 
        df_all_sales = pd.read_csv(file)
    except FileNotFoundError:
        with open('all_sales.csv', 'w') as f:
            f.write('id,date,week,month,year,prod_1,prod_2,prod_3,prod_4,prod_5,prod_6,prod_7,prod_8,prod_9,prod_10,prod_11,prod_12,prod_13,prod_14,prod_15,prod_16,prod_17,prod_18,prod_19,prod_20\n')
        df_all_sales = pd.read_csv(file)
    df_sales_week.to_csv(file, mode='a', index= False, header=False)                        # Saving the data in a csv file
  

In [45]:
PRODUCT_UNIT = ["prod_1","prod_2","prod_3","prod_4","prod_5","prod_6","prod_7"]
PRODUCT_KG = ["prod_8","prod_9","prod_10","prod_11","prod_12","prod_13",
              "prod_14","prod_15","prod_16","prod_17","prod_18","prod_19",
              "prod_20"]
COLUMNS_ORDER = ['id','date','week','month','year'] + PRODUCT_UNIT + PRODUCT_KG

df_sales_week = request_server()
all_sales(df_sales_week)
df_sales_week.head(2)

Unnamed: 0,id,date,week,month,year,prod_1,prod_2,prod_3,prod_4,prod_5,...,prod_11,prod_12,prod_13,prod_14,prod_15,prod_16,prod_17,prod_18,prod_19,prod_20
0,lacvp2q7twf4yqbd5to,12-02-2020,6,2,2020,17,46,20,12,20,...,0.0,0.0,0.0,0.0,557.50021,0.0,0.0,0.0,0.0,0.0
1,lacvp2q77uss7uw0i02,12-02-2020,6,2,2020,48,27,19,0,36,...,381.588809,0.0,0.0,0.0,44.187491,0.0,0.0,0.0,0.0,0.0


In [34]:
def monthly_sales(df_sales_week, url: str ='http://localhost:3000/api/ep1', file: str ='monthly_sales.csv'):
    '''
    Function for monthly sales
    : param url: url for the request
    : param file: file for the monthly sales
    : return: a csv file with the monthly sales
    '''
    try: 
        df_monthly_sales = pd.read_csv(file)
    except FileNotFoundError:
        with open('monthly_sales.csv', 'w') as f:
            f.write('month,year,prod_1,prod_2,prod_3,prod_4,prod_5,prod_6,prod_7,prod_8,prod_9,prod_10,prod_11,prod_12,prod_13,prod_14,prod_15,prod_16,prod_17,prod_18,prod_19,prod_20\n')
        df_monthly_sales = pd.read_csv(file)

    if not df_monthly_sales.empty:
        df_sales = df_sales_week.groupby(['month','year']).sum(numeric_only=True)  
        df_monthly_sales = df_monthly_sales.groupby(['month','year']).sum(numeric_only=True)
        df_monthly_sales = df_monthly_sales.add(df_sales, fill_value=0)                  # Grouping by date
    else:
        df_monthly_sales = df_sales_week.groupby(['month','year']).sum(numeric_only=True)

    with open('monthly_sales.csv', 'w') as f:
            f.write('month,year,prod_1,prod_2,prod_3,prod_4,prod_5,prod_6,prod_7,prod_8,prod_9,prod_10,prod_11,prod_12,prod_13,prod_14,prod_15,prod_16,prod_17,prod_18,prod_19,prod_20\n')
    df_monthly_sales.to_csv(file, mode='a', header=False)                        # Saving the data in a csv file
    return df_monthly_sales

In [46]:
df_monthly_sales = monthly_sales(df_sales_week=df_sales_week)

  df_sales = df_sales_week.groupby(['month','year']).sum()
