## Time 15

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
sns.set()
from pyspark.sql import Window
from pyspark.sql import functions as F
from pyspark.sql import SparkSession
spark = SparkSession.builder.getOrCreate()

from project_lib import Project
project = Project(sc, '0b89d6cb-bbe8-4e28-b211-c123c318d187', 'p-0238cf25b9be2b55b2c1531b8f35d3d44725ed84')
pc = project.project_context


credentials = {
    'IAM_SERVICE_ID': 'iam-ServiceId-f8644b24-f3a5-402d-ad9c-def58df70dca',
    'IBM_API_KEY_ID': '0j9-70MzuGCxsqPXVeE2H2F27-L8ptcRo-0oMn4Mr05W',
    'ENDPOINT': 'https://s3-api.us-geo.objectstorage.service.networklayer.com',
    'IBM_AUTH_ENDPOINT': 'https://iam.cloud.ibm.com/oidc/token',
    'BUCKET': 'hackathon-donotdelete-pr-xe1bajv3coopx1',
    'FILE': 'olist_customers_dataset.csv'
}

from ibm_botocore.client import Config
import ibm_boto3
cos = ibm_boto3.client(service_name='s3',
    ibm_api_key_id=credentials['IBM_API_KEY_ID'],
    ibm_service_instance_id=credentials['IAM_SERVICE_ID'],
    ibm_auth_endpoint=credentials['IBM_AUTH_ENDPOINT'],
    config=Config(signature_version='oauth'),
    endpoint_url=credentials['ENDPOINT'])

Waiting for a Spark session to start...
Spark Initialization Done! ApplicationId = app-20200803022509-0000
KERNEL_ID = 5b5be10d-dd1f-4327-832b-a92eda861e71


## Funções 

In [2]:
# verifica colunas nulas
def check_null_col(df):
    qtd_linhas = df.count()
    colunas = df.columns
    for c in colunas:
        desc = df.describe()
        qtd_linhas_col = int(desc.collect()[0][1])
        if qtd_linhas > qtd_linhas_col:
            return c

# verifica linhas duplicadas
def check_line_duplic(df):
    qtd_linhas = df.count()
    qtd_linhas_distinct = df.distinct().count()
    if qtd_linhas > qtd_linhas_distinct:
        return df.dropDuplicates()
    else:
        return df

## Data preparation

In [3]:
## IMPORTANDO DATASETs

# dataset informacoes vendedor
cos.download_file(Bucket=credentials['BUCKET'],Key='olist_sellers_dataset.csv',Filename='olist_sellers_dataset.csv')
df_vendedor = spark.read.option('header','true').csv('olist_sellers_dataset.csv')

# dataset informacoes geolocalizacao
cos.download_file(Bucket=credentials['BUCKET'],Key='olist_geolocation_dataset.csv',Filename='olist_geolocation_dataset.csv')
df_geolocal = spark.read.option('header','true').csv('olist_geolocation_dataset.csv')

# dataset informacoes item venda
cos.download_file(Bucket=credentials['BUCKET'],Key='olist_order_items_dataset.csv',Filename='olist_order_items_dataset.csv')
df_item_pedido = spark.read.option('header','true').csv('olist_order_items_dataset.csv')

# dataset informacoes pagamento
cos.download_file(Bucket=credentials['BUCKET'],Key='olist_order_payments_dataset.csv',Filename='olist_order_payments_dataset.csv')
df_pagamento = spark.read.option('header','true').csv('olist_order_payments_dataset.csv')

# dataset informacoes item avaliacao
cos.download_file(Bucket=credentials['BUCKET'],Key='olist_order_reviews_dataset.csv',Filename='olist_order_reviews_dataset.csv')
df_avaliacao = spark.read.option('header','true').csv('olist_order_reviews_dataset.csv')

# dataset informacoes pedido
cos.download_file(Bucket=credentials['BUCKET'],Key='olist_orders_dataset.csv',Filename='olist_orders_dataset.csv')
df_pedido = spark.read.option('header','true').csv('olist_orders_dataset.csv')

# dataset informacoes produto
cos.download_file(Bucket=credentials['BUCKET'],Key='olist_products_dataset.csv',Filename='olist_products_dataset.csv')
df_produto = spark.read.option('header','true').csv('olist_products_dataset.csv')

# dataset informacoes cliente
cos.download_file(Bucket=credentials['BUCKET'],Key='olist_customers_dataset.csv',Filename='olist_customers_dataset.csv')
df_cliente = spark.read.option('header','true').csv('olist_customers_dataset.csv')

# dataset informacoes depara nome produto
cos.download_file(Bucket=credentials['BUCKET'],Key='product_category_name_translation.csv',Filename='product_category_name_translation.csv')
df_depara_prod = spark.read.option('header','true').csv('product_category_name_translation.csv')


In [4]:
## VERIFICANDO DUPLICIDADES

df_vendedor = check_line_duplic(df_vendedor)
df_geolocal = check_line_duplic(df_geolocal)
df_item_pedido = check_line_duplic(df_item_pedido)
df_pagamento = check_line_duplic(df_pagamento)
df_avaliacao = check_line_duplic(df_avaliacao)
df_pedido = check_line_duplic(df_pedido)
df_produto = check_line_duplic(df_produto)
df_cliente = check_line_duplic(df_cliente)
df_depara_prod = check_line_duplic(df_depara_prod)



In [5]:
## VERIFICANDO CAMPOS NULOS

# vendedor
check_null_col(df_vendedor)

In [6]:
# geolocalizacao
check_null_col(df_geolocal)

In [7]:
# pedido
check_null_col(df_item_pedido)

In [None]:
# pagamento
check_null_col(df_pagamento)

In [None]:
# avaliacao
check_null_col(df_avaliacao)

In [None]:
df_avaliacao.filter(df_avaliacao.review_id.isNull()).show()

In [None]:
df_avaliacao = df_avaliacao.filter(df_avaliacao.review_id.isNotNull())

In [None]:
# pedido
check_null_col(df_pedido)

In [None]:
# produto
check_null_col(df_produto)

In [None]:
# cliente
check_null_col(df_cliente)

In [None]:
# depara produto

check_null_col(df_depara_prod)

In [None]:
## VERIFICANDO TIPOS DE COLUNAS

# vendedor
df_vendedor.dtypes

In [None]:
df_vendedor.show()

In [None]:
# geolocalizacao
df_geolocal.dtypes

In [None]:
df_geolocal.show()

In [None]:
# pagamento
df_pagamento.dtypes

In [None]:
df_pagamento.show()

In [None]:
df_pagamento = df_pagamento.select('order_id','payment_sequential','payment_type',\
                      df_pagamento.payment_installments.cast('int'),\
                      df_pagamento.payment_value.cast('float')\
                     )

In [None]:
# item pedido
df_item_pedido.dtypes

In [None]:
df_item_pedido.show()

In [None]:
df_item_pedido = df_item_pedido.select('order_id','order_item_id','product_id','seller_id',\
                      df_item_pedido.shipping_limit_date.cast('timestamp'),\
                      df_item_pedido.price.cast('float'),\
                      df_item_pedido.freight_value.cast('float')
                     )

In [None]:
# avaliacao
df_avaliacao.dtypes

In [None]:
df_avaliacao.show()

In [None]:
df_avaliacao = df_avaliacao.select('review_id','order_id',\
                                   df_avaliacao.review_score.cast('int'),\
                                   'review_comment_title','review_comment_message',\
                                   df_avaliacao.review_creation_date.cast('timestamp'),\
                                   df_avaliacao.review_answer_timestamp.cast('timestamp')
                                  )

In [None]:
# pedido
df_pedido.dtypes

In [None]:
df_pedido.show()

In [None]:
df_pedido = df_pedido.select('order_id','customer_id','order_status',\
                             df_pedido.order_purchase_timestamp.cast('timestamp'),\
                             df_pedido.order_approved_at.cast('timestamp'),\
                             df_pedido.order_delivered_carrier_date.cast('timestamp'),\
                             df_pedido.order_delivered_customer_date.cast('timestamp'),\
                             df_pedido.order_estimated_delivery_date.cast('timestamp')\
                            )

In [None]:
# pedido
df_produto.dtypes

In [None]:
df_produto.show()

In [None]:
# cliente
df_cliente.dtypes

In [None]:
df_cliente.show()

In [None]:
# depara_produto
df_depara_prod.dtypes

In [None]:
df_depara_prod.show()

In [None]:
df_depara_prod.show(df_depara_prod.count())

## Transformacoes

In [9]:
##  quantidade de vendas por categoria de produtos por estado
df_pedido_estado = df_pedido.join(df_cliente, df_pedido.customer_id == df_cliente.customer_id)\
.drop(df_cliente.customer_id)

df_podutos_pedido = df_item_pedido.join(df_produto, df_item_pedido.product_id == df_produto.product_id)\
.drop(df_produto.product_id)\
.groupBy('order_id','product_category_name').count()

df_pedido_prod_estado = df_pedido_estado.join(df_podutos_pedido, df_pedido_estado.order_id == df_podutos_pedido.order_id)\
.drop(df_podutos_pedido.order_id)\
.groupBy('customer_state','product_category_name').sum('count')\
.withColumnRenamed('sum(count)','qtd_produto')

df_qtd_vend_prod_estado = df_pedido_prod_estado.orderBy(df_pedido_prod_estado.customer_state,df_pedido_prod_estado.qtd_produto.desc())


## Transformacoes para MVP Tableau

In [None]:
# join pedido com item pedido
df_podutos_pedido_mvp = df_item_pedido.join(df_produto, df_item_pedido.product_id == df_produto.product_id)\
.drop(df_produto.product_id)

df_podutos_pedido_mvp.count()

In [None]:
# filtra somente o segmento da loja mvp
df_podutos_pedido_mvp = df_podutos_pedido_mvp.filter((df_podutos_pedido_mvp.product_category_name == 'cama_mesa_banho')\
                                                     | (df_podutos_pedido_mvp.product_category_name == 'beleza_saude')\
                                                     | (df_podutos_pedido_mvp.product_category_name == 'esporte_lazer')
                                                    )
df_podutos_pedido_mvp.count()

In [None]:
# join com endereco do comprador
df_pedido_estado_mvp = df_podutos_pedido_mvp.join(df_pedido_estado, df_podutos_pedido_mvp.order_id == df_pedido_estado.order_id)\
.drop(df_pedido_estado.order_id)

df_pedido_estado_mvp.count()

df_pedido_estado_mvp.show()

In [None]:
# tatra df geolocal
df_geolocal_mvp = df_geolocal.withColumn('rk', F.row_number().over(Window.partitionBy('geolocation_zip_code_prefix')\
                                                             .orderBy('geolocation_state')))
df_geolocal_mvp = df_geolocal_mvp.filter(df_geolocal_mvp.rk <= 1)

In [None]:
# join com info da geolocalizacao
df_pedido_geoloc_mpv = df_pedido_estado_mvp.join(df_geolocal_mvp, df_pedido_estado_mvp.customer_zip_code_prefix == df_geolocal.geolocation_zip_code_prefix)

df_pedido_geoloc_mpv.count()

In [None]:
pandas_df = df_pedido_geoloc_mpv.toPandas()
project.save_data("dados_tableau.csv", pandas_df.to_csv(index=False))

In [None]:
project.save_data("dados_tableau.csv", pandas_df.to_csv(index=False))

## Analise Exploratoria

In [10]:
customers = df_cliente.toPandas()
sellers = df_vendedor.toPandas()
geolocation = df_geolocal.toPandas()
orders_items = df_item_pedido.toPandas()
orders = df_pedido.toPandas()
products = df_produto.toPandas()
reviews = df_avaliacao.toPandas()
payments = df_pagamento.toPandas()
#products = products.iloc[:,:2]

# quanditade de vendas por categoria de produtos (top 3) por estado

df_1 = df_qtd_vend_prod_estado.withColumn('rk', F.row_number().over(Window.partitionBy('customer_state')\
                                                             .orderBy(df_qtd_vend_prod_estado.qtd_produto.desc())))

df_1.filter(df_1.rk <= 3).show()


+--------------+---------------------+-----------+---+
|customer_state|product_category_name|qtd_produto| rk|
+--------------+---------------------+-----------+---+
|            SC|        esporte_lazer|        363|  1|
|            SC|     moveis_decoracao|        354|  2|
|            SC|      cama_mesa_banho|        330|  3|
|            RO|         beleza_saude|         25|  1|
|            RO| informatica_acess...|         23|  2|
|            RO|        esporte_lazer|         20|  3|
|            PI|         beleza_saude|         54|  1|
|            PI|   relogios_presentes|         41|  2|
|            PI|            telefonia|         38|  3|
|            AM|         beleza_saude|         20|  1|
|            AM| informatica_acess...|         17|  2|
|            AM|            telefonia|         15|  3|
|            RR|        esporte_lazer|          8|  1|
|            RR|         beleza_saude|          7|  2|
|            RR|     moveis_decoracao|          6|  3|
|         

In [None]:
geolocation.head()

In [None]:
customers.head()

In [None]:
sellers.head()

In [None]:
products.head()

In [None]:
orders_items.head()

In [None]:
orders.head()

In [None]:
payments.head()

In [None]:
reviews.head()

In [None]:
df1 = sellers.merge(orders_items, on='seller_id', how='inner')
df2 = orders.merge(customers, on='customer_id', how='inner')

In [None]:
df3 = df1.merge(df2, on='order_id', how='inner')
df4 = df3.merge(products, on='product_id', how='inner')
df5 = payments.merge(reviews, on='order_id', how='inner')

In [None]:
raw_merge = df5.merge(df4, on='order_id', how='inner')

In [None]:
raw_merge

In [None]:
df = raw_merge.copy()
df['payment_sequential'] = df['payment_sequential'].astype('str')
df['order_item_id'] = df['order_item_id'].astype('str')
df['seller_zip_code_prefix'] = df['seller_zip_code_prefix'].astype('str')
df['customer_zip_code_prefix'] = df['customer_zip_code_prefix'].astype('str')

In [None]:
df.info()

In [None]:
df.describe()

In [None]:
df['order_purchase_timestamp'] = pd.to_datetime(df['order_purchase_timestamp'], dayfirst=True)
df['order_date'] = df['order_purchase_timestamp'].dt.date

In [None]:
df

In [None]:
### Distribuição de consumo/estado
df_consumo_cidade = df.groupby(['customer_city']).sum().reset_index()

In [None]:
df_consumo_cidade.head()

In [None]:
df_consumo_estado_qtd = df.groupby(['customer_state']).count().reset_index()[['customer_state','price']]

In [None]:
df_consumo_estado_qtd

In [None]:
### Categorias mais vendidas por região

df_cat_estado = df.groupby(['customer_state','product_category_name','order_date']).count().reset_index()[['order_date','customer_state','product_category_name','price']]
df_cat_estado.rename(columns={'price':'quantidade'}, inplace=True)
#df_cat_estado.set_index('order_date', inplace=True)

In [None]:
df_cat_estado['order_date'] = pd.to_datetime(df_cat_estado['order_date'])

In [None]:
df_cat_sp = df_cat_estado.copy()

In [None]:
df_cat_sp['product_category_name'].unique()

In [None]:
df_pet_sp = df_cat_sp[df_cat_sp['product_category_name'] == 'fashion_roupa_feminina']
sns.lineplot(x='order_date', y='quantidade', data=df_pet_sp)

In [None]:
df_cat_sp['ds'] = df_cat_sp['order_date']
df_cat_sp['y'] = df_cat_sp['quantidade']

from fbprophet import Prophet
grouped = df_cat_sp.groupby(['customer_state','product_category_name','order_date']).sum()

In [None]:
grouped

In [None]:
df.info()

In [None]:
df_sales = df.copy()

In [None]:
df_sales['order_date'] = df_sales.order_purchase_timestamp.dt.date
sales_per_purchase_date = df_sales.groupby('order_date', as_index=False).price.sum()

In [None]:
ax = sns.lineplot(x="order_date", y="price", data=sales_per_purchase_date)
ax.set_title('Sales per day')

In [None]:
df_sales['order_purchase_week'] = df_sales.order_purchase_timestamp.dt.to_period('W').astype(str)
sales_per_purchase_month = df_sales.groupby('order_purchase_week', as_index=False).price.sum()
ax = sns.lineplot(x="order_purchase_week", y="price", data=sales_per_purchase_month)
ax.set_title('Sales per week')

In [None]:
### Análise de scores por categorias
mean_score_cat = df.groupby('product_category_name', as_index=False).agg({'review_score': ['count', 'mean']})
mean_score_cat.columns = ['product_category_name', 'count', 'mean']

mean_score_cat = mean_score_cat[mean_score_cat['count'] > 50]
mean_score_cat = mean_score_cat.sort_values(by='mean', ascending=False)
mean_score_cat.head()

In [None]:
plt.figure(figsize=(10,15))
ax = sns.barplot(x="mean", y="product_category_name", data=mean_score_cat)
ax.set_title('Categories Review Score')

In [None]:
### Categorias mais vendidas
order_product=pd.merge(orders_items,products,on='product_id',how='left')
order_product.shape

In [None]:
most_product=order_product.groupby('product_category_name').aggregate({'order_id':'count'}).rename(columns={'order_id':'order_count'}).sort_values(by='order_count',ascending=False).reset_index()
most_product.head()

In [None]:
sns.barplot(x='product_category_name',y='order_count',data=most_product[:10],color="blue")
plt.xlabel("Product Category")
plt.ylabel("Total Number of orders")
plt.title("Most bought product categories")
plt.xticks(rotation='vertical')
plt.show()

In [None]:
### Produtos mais vendidos
total_orders=pd.merge(orders, orders_items)
product_orders=pd.merge(total_orders,products, on="product_id")
product_orders.info()

In [None]:
### Top 10

product_orders['product_id'].value_counts()[:10]

In [None]:
product_orders['product_id_shorten']=product_orders['product_id'].str[-8:]
plt.figure(figsize=(10,10))
sns.countplot(x='product_id_shorten', data=product_orders, palette='gist_earth',
             order=product_orders['product_id_shorten'].value_counts()[:10]\
             .sort_values().index).set_title("Top 10 Products", fontsize=15,
                                             weight='bold')

In [None]:
### Top 10 Vendedores
seller_products = pd.merge(order_product, sellers, on="seller_id")
seller_products.info()

In [None]:
seller_products['seller_id_shorten']=seller_products['seller_id'].str[-6:]

In [None]:
plt.figure(figsize=(10,10))
sns.countplot(x='seller_id_shorten', data=seller_products, palette='gist_earth',
             order=seller_products['seller_id_shorten'].value_counts()[:10]\
             .sort_values().index).set_title("Top 10 Sellers", fontsize=15,
                                             weight='bold')

In [None]:
### Top 10

top10_sellers = seller_products['seller_id'].value_counts()[:10].reset_index().rename(columns={'index':'seller'})['seller']

In [None]:
### Análise do top seller
top10_sellers

In [None]:

product_orders_s = product_orders[product_orders['seller_id'] == '6560211a19b47992c3666cc44a7e94c0']
product_orders_s['product_id_shorten']=product_orders_s['product_id'].str[-8:]
plt.figure(figsize=(20,10))
sns.countplot(x='product_id_shorten', data=product_orders_s, palette='gist_earth',
            order=product_orders_s['product_id_shorten'].value_counts()[:20]\
            .sort_values().index).set_title("Top 20 Products of the top seller", fontsize=15,
                                            weight='bold')

In [None]:
### Revenue composition of the top seller # 6560211a19b47992c3666cc44a7e94c0
df_top_revenue = df[df['seller_id'] == '6560211a19b47992c3666cc44a7e94c0']

In [None]:
total_rev = df_top_revenue['payment_value'].sum()

In [None]:
df_top_revenue['Share'] = (df_top_revenue['payment_value']/total_rev)*100

In [None]:
df_top_revenue.groupby(['product_category_name']).sum()

In [None]:
df_top_revenue.groupby(['customer_id']).sum()

## Estudos de comportamento de mercado e suas categorias com algoritmos de anomaly detection


In [None]:
df_new = df.copy()
sns.heatmap(df_new.corr())

In [None]:
### Juntando dados de 2018
df_new['year_month'] = pd.to_datetime(df_new['order_date'])
df_new['year_month'] = df_new['year_month'].dt.to_period('D')

In [None]:
df_sales = df_new.groupby(['year_month']).agg({
    'review_score': ['mean','median'], 'payment_value':['sum','count'],
    'price':'sum', 'freight_value':'sum', 'payment_installments': ['mean','median']})
df_sales.columns = ['review_score_mean', 'review_score_median', 'payment_value_sum', 'purchases_count', 'price_sum', 'freight_value_sum', 'payment_installments_mean','payment_installments_median']
df_sales = df_sales.reset_index()

In [None]:
df_sales['year_month'] = df_sales['year_month'].astype('str')
df_sales['year_month'] = pd.to_datetime(df_sales['year_month'])
df_sales = df_sales[(df_sales['year_month'] > '2017-01-01') & (df_sales['year_month'] < '2018-01-01')]

In [None]:
df_rev = df_sales[['year_month','payment_value_sum']]
df_rev.set_index('year_month', inplace=True)

df_qtd = df_sales[['year_month','purchases_count']]
df_qtd.set_index('year_month', inplace=True)

In [None]:
from adtk.data import validate_series
from adtk.visualization import plot

In [None]:
### Análise de detecção de anomalias/mudanças de padrões de mercado
from adtk.detector import PersistAD
s = validate_series(df_rev)
persist_ad = PersistAD(c=1.0, side='positive')
anomalies = persist_ad.fit_detect(s)
plot(s, anomaly=anomalies, ts_linewidth=1, ts_markersize=3, anomaly_color='red');

In [None]:
from adtk.detector import PersistAD
s = validate_series(df_qtd)
persist_ad = PersistAD(c=1.0, side='positive')
anomalies = persist_ad.fit_detect(s)
plot(s, anomaly=anomalies, ts_linewidth=1, ts_markersize=3, anomaly_color='red');

In [None]:
### Volatility Shift
from adtk.detector import VolatilityShiftAD
s = validate_series(df_rev)
volatility_shift_ad = VolatilityShiftAD(c=3.0, side='positive', window=10)
anomalies = volatility_shift_ad.fit_detect(s)
plot(s, anomaly=anomalies, anomaly_color='red');

In [None]:
from adtk.detector import VolatilityShiftAD
s = validate_series(df_qtd)
volatility_shift_ad = VolatilityShiftAD(c=3.0, side='positive', window=10)
anomalies = volatility_shift_ad.fit_detect(s)
plot(s, anomaly=anomalies, anomaly_color='red');

In [None]:
from adtk.detector import SeasonalAD
s = validate_series(df_rev)
seasonal_ad = SeasonalAD(c=1.0, side="both")
anomalies = seasonal_ad.fit_detect(s)
plot(s, anomaly=anomalies, ts_markersize=1, anomaly_color='red', anomaly_tag="marker", anomaly_markersize=2);

In [None]:
from adtk.detector import SeasonalAD
s = validate_series(df_qtd)
seasonal_ad = SeasonalAD(c=1.0, side="both")
anomalies = seasonal_ad.fit_detect(s)
plot(s, anomaly=anomalies, ts_markersize=1, anomaly_color='red', anomaly_tag="marker", anomaly_markersize=2);

In [None]:
### Segmentando análise dos principais produtos entre si
df_p1_2018 = df_new.copy()
top_cats = df_p1_2018.groupby(['product_category_name']).sum().reset_index().sort_values(by='payment_value', ascending=False)[['product_category_name']][:10]

In [None]:
top_cats

In [None]:
### Análise de anomalias entre categorias "cama_mesa_banho" e "informatica_acessorios" no segundo semestre de 2017
def get_cat(cat):
  df_cat = df_new[df_new['product_category_name'] == str(cat)]
  d = df_cat.groupby(['year_month']).agg({
      'review_score': ['mean','median'], 'payment_value':['sum','count'],
      'price':'sum', 'freight_value':'sum', 'payment_installments': ['mean','median']})
  d.columns = ['review_score_mean', 'review_score_median', 'payment_value_sum', 'purchases_count', 'price_sum', 'freight_value_sum', 'payment_installments_mean','payment_installments_median']
  d = d.reset_index()

  d['year_month'] = d['year_month'].astype('str')
  d['year_month'] = pd.to_datetime(d['year_month'])
  d = d[(d['year_month'] > '2017-06-01') & (d['year_month'] < '2018-01-01')]

  df_rev = d[['year_month','payment_value_sum']]
  df_rev.set_index('year_month', inplace=True)

  df_qtd = d[['year_month','purchases_count']]
  df_qtd.set_index('year_month', inplace=True)

  return df_rev, df_qtd

In [None]:
rev_cmb, qtd_cmb = get_cat('cama_mesa_banho')
rev_inf, qtd_inf = get_cat('informatica_acessorios') 
rev_rep, qtd_rep = get_cat('relogios_presentes')

In [None]:
### Persist AD

s = validate_series(rev_cmb)
persist_ad = PersistAD(c=1.0, side='positive')
anomalies = persist_ad.fit_detect(s)
plot(s, anomaly=anomalies, ts_linewidth=1, ts_markersize=3, anomaly_color='red');
plt.title('Cama Mesa Banho')

s = validate_series(rev_inf)
persist_ad = PersistAD(c=1.0, side='positive')
anomalies = persist_ad.fit_detect(s)
plot(s, anomaly=anomalies, ts_linewidth=1, ts_markersize=3, anomaly_color='red');
plt.title('Informatica e Acessorios')

s = validate_series(rev_rep)
persist_ad = PersistAD(c=1.0, side='positive')
anomalies = persist_ad.fit_detect(s)
plot(s, anomaly=anomalies, ts_linewidth=1, ts_markersize=3, anomaly_color='red');
plt.title('Relogios e Presentes')

In [None]:
### Level Shift AD
from adtk.detector import LevelShiftAD

s = validate_series(rev_cmb)
level_shift_ad = LevelShiftAD(c=6.0, side='both', window=5)
anomalies = level_shift_ad.fit_detect(s)
plot(s, anomaly=anomalies, anomaly_color='red');
plt.title('Cama Mesa Banho')

s = validate_series(rev_inf)
level_shift_ad = LevelShiftAD(c=6.0, side='both', window=5)
anomalies = level_shift_ad.fit_detect(s)
plot(s, anomaly=anomalies, anomaly_color='red');
plt.title('Informatica e Acessorios')

s = validate_series(rev_rep)
level_shift_ad = LevelShiftAD(c=6.0, side='both', window=5)
anomalies = level_shift_ad.fit_detect(s)
plot(s, anomaly=anomalies, anomaly_color='red');
plt.title('Relogio e Presentes')

In [None]:
from adtk.detector import VolatilityShiftAD

s = validate_series(rev_cmb)
volatility_shift_ad = VolatilityShiftAD(c=6.0, side='both', window=5)
anomalies = volatility_shift_ad.fit_detect(s)
plot(s, anomaly=anomalies, anomaly_color='red');
plt.title('Cama Mesa Banho')

s = validate_series(rev_inf)
volatility_shift_ad = VolatilityShiftAD(c=6.0, side='both', window=5)
anomalies = volatility_shift_ad.fit_detect(s)
plot(s, anomaly=anomalies, anomaly_color='red');
plt.title('Informatica e Acessorios')

s = validate_series(rev_rep)
volatility_shift_ad = VolatilityShiftAD(c=6.0, side='both', window=5)
anomalies = volatility_shift_ad.fit_detect(s)
plot(s, anomaly=anomalies, anomaly_color='red');
plt.title('Relogio e Presentes')