# Qualtrics integration and pipeline
Demonstration of how we will generate archetypes from the *Qualtrics* responses both for the current car as well as for each vintage (i.e. archetype defined by `vehicle_type` and `fuel_type`)

In [2]:
##### implement logging #####
"https://docs.python.org/3/howto/logging.html"
import logging

logging.basicConfig(format='%(asctime)s %(levelname)s %(funcName)s %(message)s', filename='logging.log', level=logging.INFO)

## Qualtrics

In [3]:
import os

from QualtricsAPI.Setup import Credentials

from QualtricsAPI import Responses


In [None]:
TOKEN = os.environ.get("QUALTRICS_API_KEY")
DATA_CENTER = os.environ.get("QUALTRICS_BASE_URL").split(".")[0]


In [None]:
Credentials().qualtrics_api_credentials(token=TOKEN, data_center=DATA_CENTER)


In [None]:
r = Responses()
survey_id = "SV_6u4YzY24OAfhWfQ"
df_pre_study = r.get_survey_responses("SV_6u4YzY24OAfhWfQ")


In [None]:
df_pre_study.head()


## Workflow

In [4]:
import api.scrape as tcs

from api.helper import Car


In [None]:
Car.vehicle_classes


In [None]:
Car.fuel_types


In [6]:
car = Car("Mittelklasse", "Elektro", 15)
print(car)


vehicle_type:     Mittelklasse
fuel_type:        Elektro
fuel_consumption: 15


In [7]:
similar_cars = tcs.get_cars(
    car, km=20e3, canton="ZH", similar={"flag": True, "buffer": 1}, verbose=True
)


Extracted Tesla Model 3 ­
Extracted Tesla Model 3 Long Range


In [8]:
len(similar_cars)


2

In [9]:
def remove_unit(string):
    """Removes unit from string like 17'000 CHF/year -> float(17e3)

    Args:
        string (str): string with unit
    """
    number, unit = string.split(" ")
    number = number.replace("'", "")
    return float(number)


In [19]:
import pandas as pd


def generate_archetype(similar_cars, ndigits=2):
    """Generates an archetypical car from a list of similar cars

    Args:
        similar_cars (list of dict): returned by get_cars()
        ndigits (int): round average values to
    """
    car_attributes = []
    for c in similar_cars:
        costs = c["costs"]
        attrs = {
            "fix_cost": remove_unit(costs["Fixe Kosten"]),
            "variable_cost": remove_unit(costs["Variable Kosten"]),
            "cost_per_km": remove_unit(costs["Kilometerkosten"]),
        }

        try:
            reach = c["specs"]["Reichweite (NEFZ/WLTP)"].split(" / ")
            reach = float(reach[1])
        except:
            reach = None

        attrs["reach"] = reach
        car_attributes.append(attrs)

    car_attributes = pd.DataFrame(car_attributes)

    archetype = dict(round(car_attributes.mean(), ndigits=ndigits))

    return archetype


In [18]:
similar_cars[0]["specs"]["Reichweite (NEFZ/WLTP)"].split(" / ")[1]

'491'

In [20]:
archetype = generate_archetype(similar_cars)
archetype


{'fix_cost': 7520.0,
 'variable_cost': 4274.0,
 'cost_per_km': 0.59,
 'reach': 547.5}

## Pipeline

### User specific archetype

In [None]:
## generate data.frame with variables to feed to get_cars
df_qualt = pd.DataFrame(
    {
        "vehicle_type": ["SUV S", "Luxusklasse", "Mittelklasse"],
        "fuel_type": ["Elektro", "Benzin", "Benzin"],
        "fuel_consumption": [14, 7, 2],  ## if electric => kWh/100km
        "km": [15e3, 20e3, 22e3],
        "canton": ["VD", "AI", "ZG"],
    }
)

df_qualt.head()


In [21]:
def helper(row, max_buffer=5, verbose=True, headless=True):
    car = Car(row["vehicle_type"], row["fuel_type"], row["fuel_consumption"])
    init = 1
    if verbose:
        print("---\n{}\n---".format(car))
    while init < max_buffer + 1:
        try:
            sc = tcs.get_cars(
                car,
                row["km"],
                row["canton"],
                similar={"flag": True, "buffer": init},
                verbose=verbose,
                headless=headless
            )
        except Exception:
            init += 1
            if verbose:
                print("increaseing buffer to {}".format(init))
            continue
        return sc
    return None


In [None]:
df_qualt["similar_cars"] = df_qualt.apply(lambda row: helper(row), axis=1)


In [None]:
## generate archetypes
df_qualt["archetype"] = df_qualt.apply(
    lambda row: generate_archetype(row["similar_cars"]), axis=1
)
df_qualt["archetype"]


In [None]:
## unpack stuff
df_qualt = df_qualt.assign(
    archetype_fix_cost=lambda x: [y["fix_cost"] for y in x["archetype"]],
    archetype_variable_cost=lambda x: [y["variable_cost"] for y in x["archetype"]],
    archetype_cost_per_km=lambda x: [y["cost_per_km"] for y in x["archetype"]],
)


### Generic archetypes

In [22]:
import numpy as np
from selenium.common.exceptions import NoSuchElementException, StaleElementReferenceException
from api.helper import NoSimilarCar
import pickle

## pack everything into a function
def generate_generic_archetypes(km=15e3, canton="ZH", verbose=True):
    """Generate archetype for each vehicle_class x fuel_type combination

    Args:
        km (int, optional): archetypes' annual mileage (reference). Defaults to 15e3.
        canton (str, optional): archetypes' domicile (reference). Defaults to 'AG'.
        verbose (bool, optional): neo. Defaults to True.

    Returns:
        pandas.DataFrame: containing vehicle_class, fuel_type, car_objects, as well as the archetypes (as returned by generate archetype)
    """
    ## generate df of all possible combinations
    archs = np.stack(np.meshgrid(Car.vehicle_classes, Car.fuel_types), axis=-1).reshape(
        -1, 2
    )
    df_archs = pd.DataFrame(archs)
    df_archs.columns = ["vehicle_class", "fuel_type"]

    df_archs["car_objects"] = df_archs.apply(
        lambda row: Car(row["vehicle_class"], row["fuel_type"], fuel_consumption=None),
        axis=1,
    )

    cars = []
    for c in df_archs["car_objects"]:
        if verbose:
            print("---\n{}\n---".format(c))
        try:
            car = tcs.get_cars(
                car_object=c,
                canton=canton,
                km=km,
                similar={"flag": False},
                headless=True,
                verbose=verbose,
            )
        except (NoSimilarCar, NoSuchElementException, StaleElementReferenceException) as e:
            logging.warning("Exception for {} x {}".format(c.vehicle_class, c.fuel_type))
            car = None
        cars.append(car)

    ## save
    with open("../../data/cars", "wb") as fp:
        pickle.dump(cars, fp)

    generic_archetypes = []
    for c in cars:  ## is list of similar cars
        if c is None:
            generic_archetypes.append(None)
            continue
        ga = generate_archetype(c)
        generic_archetypes.append(ga)

    df_archs["generic_archetypes"] = generic_archetypes

    return df_archs


In [None]:
generic_archs = generate_generic_archetypes()

In [23]:
"""This cell can be run if you need to construct generic_archetpyes

from cell above with hel of data/cars dump...
"""


## generate df of all possible combinations
archs = np.stack(np.meshgrid(Car.vehicle_classes, Car.fuel_types), axis=-1).reshape(
    -1, 2
)
df_archs = pd.DataFrame(archs)
df_archs.columns = ["vehicle_class", "fuel_type"]

df_archs["car_objects"] = df_archs.apply(
    lambda row: Car(row["vehicle_class"], row["fuel_type"], fuel_consumption=None),
    axis=1,
)

## save
with open("../../data/cars", "rb") as fp:
    cars = pickle.load(fp)

generic_archetypes = []
for c in cars:  ## is list of similar cars
    if c is None:
        generic_archetypes.append(None)
        continue
    ga = generate_archetype(c)
    generic_archetypes.append(ga)

df_archs["generic_archetypes"] = generic_archetypes

In [24]:
with open("../../data/ga", "wb") as fp:
    pickle.dump(df_archs, fp)

In [None]:
with open("../../data/generic_archs", "wb") as fp:
    pickle.dump(generic_archs, fp)

## Remarks
- `fuel_consumption` with `fuel_type='electric'` see **Normverbrauch**: https://www.verbrauchskatalog.ch/index.php => higher values and unit kWh/100km