In [1]:
import pandas as pd
import numpy as np
import requests
import re
from time import asctime, strftime

import logging
import sqlalchemy
from sqlalchemy import create_engine
from sqlalchemy.types import String
import psycopg2
import pysftp
import paramiko
import io
import sys

import email
import smtplib
from email.mime.text import MIMEText

####
import tpm
###
cnopts = pysftp.CnOpts()
cnopts.hostkeys = None 
pd.set_option("display.max_columns",99)

## Logger

In [3]:
logging.basicConfig(filename='LogFile.log',level=logging.DEBUG)
LogFileName = 'LogFile.log'
logging.basicConfig(filename=LogFileName, level=logging.DEBUG,
                    filemode='a', datefmt='%d-%b-%y %H:%M:%S',
                    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')

## Import Econda



In [4]:
###########################################
# Let's start with Econda
###########################################
# 
# +++ THE NAME OF THE REPORT +++
# ReportName
# 
# +++ A TYPICAL REST QUERY +++
# https://monitor.econda-monitor.de/rest/chartservice?user=User.Name
# &client=1234&pass=xxx&datascope=WEBANALYSIS&GENERATE_TYPE=CSV
# &myreport=hexReportName
# &fromDate=2018-10-29+00%3A00&toDate=2018-11-05+00%3A00

howManyMonths = 3 # hwo many months back?

Today = pd.datetime.today()
StartDay = pd.datetime.today()-pd.Timedelta(howManyMonths*30,'D')

StartDay_string = StartDay.strftime('%Y%m%d')
Today_string = Today.strftime('%Y%m%d')

StartDay_query = StartDay.strftime('%Y-%m-%d 00:00') # add 00:00 to correctly work with the query
Today_query = Today.strftime('%Y-%m-%d 00:00')


### starting requests
url = 'https://monitor.econda-monitor.de/rest/chartservice' 
headers = {'user': User.Name, 
           'client': '1234',
           'pass': password, 'datascope':'WEBANALYSIS',
           'GENERATE_TYPE':'CSV',
           'myreport':'hexReportName','fromDate':StartDay_query,'toDate':Today_query}

try:
    r = requests.get(url=url, params=headers)
    logging.debug(strftime('%d.%m.%Y %H:%M:%S')+'requests: '+r)
except:
    logging.debug(strftime('%d.%m.%Y %H:%M:%S')+'oops, problems with requests')
try:
    #data = pd.read_csv(io.StringIO(r.text),sep=';',decimal=',',encoding='utf_8')
    dataEcoRaw = pd.read_csv(io.StringIO(r.text),sep=';',decimal=',',thousands='.',encoding='utf_8', dtype={'Umsatz': 'float'})
    logging.debug(strftime('%d.%m.%Y %H:%M:%S')+' DataFrame dataEco created')
except:
    logging.debug(strftime('%d.%m.%Y %H:%M:%S')+'oops, problems with the datafraem creation')
    

In [5]:
try:
    whichColsEco = ['Newsletter Kampagne','Land','Sprache','Produkte - Stückzahl verkauft',
                    'Umsatz','Besuche (unique)','Besucher (unique)','Kunden','Bestellungen',
                    'Bounces']
    dataEco = dataEcoRaw.loc[:,whichColsEco].rename(columns={'Produkte - Stückzahl verkauft':'Produkte - Stueckzahl verkauft'})

    dataEco['Newsletter Kampagne'] = dataEco['Newsletter Kampagne'].str.lower()

    # handles the company name: for company3 you find newsletter instead!
    dataEco['Firma'] = (dataEco['Newsletter Kampagne']
                       .str
                        .extract('(^companyName1|^companyName2|^newsletter|^companyName4)',expand=False)
                       .replace({'newsletter':'companyName3'}))
    
    # Salesforce uses sv while econda sv for the swedish laguage. Let's take se for all!
    dataEco.Sprache = dataEco.Sprache.replace({'sv':'se'})
except Exception:
    logging.exception("Problems Econda data cleaning")

In [6]:
dataEco.dtypes

Newsletter Kampagne                object
Land                               object
Sprache                            object
Produkte - Stueckzahl verkauft      int64
Umsatz                            float64
Besuche (unique)                    int64
Besucher (unique)                   int64
Kunden                              int64
Bestellungen                        int64
Bounces                             int64
Firma                              object
dtype: object

In [9]:
# Not every companies had the campaign name at the right position. In this way I fix the problem.

def MySelection2(x):
    try:
        return x[2]
    except IndexError:
        return np.nan

def MySelection3(x):
    try:
        return x[3]
    except IndexError:
        return np.nan
    
def MySelection4(x):
    try:
        return x[4]
    except IndexError:
        return np.nan

try:
    dataEco['ThirdField'] = dataEco.loc[:,'Newsletter Kampagne'].str.split(r'%2F|/|//').apply(MySelection2)
    dataEco['FourthField'] = dataEco.loc[:,'Newsletter Kampagne'].str.split(r'%2F|/|//').apply(MySelection3) 
    dataEco['FifthField'] = dataEco.loc[:,'Newsletter Kampagne'].str.split(r'%2F|/|//').apply(MySelection4)

    # remove entries for which there is no clear corresponding Campaign name
    dataEco = dataEco.loc[(dataEco.FifthField.notnull()) & (dataEco.FourthField.notnull()),:]

    # get KampagnenName taking into account the fancy setup of companyName3
    dataEco['KampagnenName'] = dataEco.apply(lambda x: x['FifthField'] if x['Firma']=='companyName3' else x['FourthField'],axis=1)

    # get Kategorie taking into account the fancy setup of companyName3
    # Kategorie is always the field before the KampagenName
    dataEco['Kategorie'] = dataEco.apply(lambda x: x['FourthField'] if x['Firma']=='companyName3' else x['ThirdField'],axis=1)

    dataEco['FirmaCode'] = dataEco.Firma.replace({'companyName1':'100', 'companyName2':'470', 'companyName3':'460', 'companyName4':'400'})
    dataEco['KampagnenName'] = dataEco['KampagnenName'].str.lower()
    dataEco['Besuche'] = np.nan
    dataEco['Besucher'] = np.nan
except Exception:
    logging.exception("Problems Econda data cleaning 2")

In [13]:
# I perform here a groupby to sum up all the results belonging to the same 
# ('FirmaCode','KampagnenName','Land','Sprache') tuple. The reason is that
# I want to sum up on things like different teaser, or whatever. Everything
# belonging to this tuple should  be together.

# We need to bring also the KPI Kategorie into the joining DataFrame even though
# it is not something we can groupby on: in fact, it is lost in the dataEco_toJoin_a
# In order to bring it to the next join I repeat the same groupby bringing only
# Kategorie as KPI and taking the min (I could have also taken something else max, mean
# or whatever)
try:
    dataEco_toJoin_a = dataEco.groupby(['FirmaCode','KampagnenName','Land','Sprache']).sum().reset_index()
    dataEco_toJoin_b = dataEco.groupby(['FirmaCode','KampagnenName','Land','Sprache'])['Kategorie'].min().reset_index()
    # I join the two above to get the final part to be further joined with the results from SFMC
    dataEco_toJoin = pd.merge(dataEco_toJoin_a,dataEco_toJoin_b,on=['FirmaCode','KampagnenName','Land','Sprache'])
except Exception:
    logging.exception("Problems joining Econda data")