In [1]:
import glob
import json
import os
import datetime

import pandas as pd
pd.set_option('display.max_rows', 100)
import plotly.express as px

In [2]:
DATA_PATH = "../vwkommi"

json_files = glob.glob(f"{DATA_PATH}/*.json")

In [3]:
def load_file(path):
    unique_specs = {}
    file_name = os.path.basename(os.path.splitext(path)[0])
    date = file_name.split("_")[-1].split("T")[0]

    cars = []

    with open(path, "r") as f:
        json_string = json.loads(f.read())

    counter = 0
    for komm_num, elems in json_string.items():
        counter += 1
        car = {
            "Datum": datetime.datetime.strptime(date, "%Y-%m-%d"),
            "Kommissionsnummer": komm_num,
            "Kommissionsbereich": komm_num[:2],
            "FIN": "",
            "Modell": "",
            "Farbcode": "",
            "Farbe": "",
            "Motor": "",
            "Modelljahr": "",
            "Ausstattung": "",
        }

        num_elems = len(elems)
        for num, elem in zip(range(num_elems), elems):
            if num == 0:
                try:
                    car['FIN'] = elem["vin"]
                except KeyError:
                    car['FIN'] = ""
                
                car['Modell'] = elem["modelName"].split(" ")[0]
                car['Variante'] = ' '.join(elem["modelName"].split(" ")[1:])
                car['Farbcode'] = elem["exteriorColor"]

            if num == 1:
                car['Motor']  = elem["engine"]
                car['Modelljahr']  = elem["modelYear"]
                try:
                    car['Farbe'] = elem["exteriorColorText"]
                except KeyError:
                    car['Farbe'] = ""

                try:
                    specifications = elem["specifications"]
                    
                    specs = []
                    for spec in specifications:
                        specs.append(spec["codeText"])

                        remember_spec = unique_specs.get(spec["codeText"], 0)
                        unique_specs[spec["codeText"]] = remember_spec + 1


                    car['Ausstattung'] = specs
                except KeyError:
                    car['Ausstattung'] = ""

        cars.append(car)

    df = pd.DataFrame(cars)

    #df = df.explode('Ausstattung')
    return df, unique_specs, counter


In [4]:
dfs = []
unique_specs = {}
counter = 0
num_files = len(json_files)
for file, num in zip(json_files, range(len(json_files))):
    print(f"Parsing {file} ({num}/{num_files})")
    _df, _unique_specs, _counter = load_file(file)
    dfs.append(_df)

    for spec, spec_count in _unique_specs.items():
        c = unique_specs.get(spec, 0)
        unique_specs[spec] = c + spec_count

    counter += _counter

_df = pd.concat(dfs)
df = _df[(_df["Modell"] != "") & (_df["Variante"] != "") & ~(_df["Variante"].isna())]

Parsing ../vwkommi/output_AJ_0-9999_2022-04-30T11.13.34.json (0/102)
Parsing ../vwkommi/output_AF_5000-9999_2022-04-09T10.57.49.json (1/102)
Parsing ../vwkommi/output_AF_5000-9999_2022-03-29T17.44.56.json (2/102)
Parsing ../vwkommi/output_AJ_0-9999_2022-03-05T15.23.25.json (3/102)
Parsing ../vwkommi/output_AJ_0-9999_2022-04-23T10.54.17.json (4/102)
Parsing ../vwkommi/output_AI_0-9999_2022-05-15T10.52.21.json (5/102)
Parsing ../vwkommi/output_AJ_0-9999_2022-05-07T09.48.18.json (6/102)
Parsing ../vwkommi/output_AH_0-9999_2022-06-01T19.11.16.json (7/102)
Parsing ../vwkommi/output_AI_0-9999_2022-03-26T11.27.59.json (8/102)
Parsing ../vwkommi/output_AK_0-9999_2022-04-16T10.27.07.json (9/102)
Parsing ../vwkommi/output_AF_5000-9999_2022-02-20T09.09.46.json (10/102)
Parsing ../vwkommi/output_AL_0-9999_2022-05-15T10.52.21.json (11/102)
Parsing ../vwkommi/output_AG_0-9999_2022-03-19T10.44.38.json (12/102)
Parsing ../vwkommi/output_AJ_0-9999_2022-02-27T20.45.14.json (13/102)
Parsing ../vwkommi/ou

In [5]:
model_df = df[['Datum', 'Modell', 'Kommissionsnummer']].groupby(['Datum', 'Modell']).nunique().reset_index()

In [6]:
fig = px.line(
    model_df,
    x="Datum",
    y="Kommissionsnummer",
    color="Modell",
    title='Anzahl Fahrzeuge Pro Woche nach Modell',
    markers=True,
    labels={
        "Kommissionsnummer": "Anzahl",
    }
)

fig.write_html("../html/images/model_count.html", include_plotlyjs="cdn", full_html=False)

In [7]:
varianten_df = df[['Datum', 'Modell', "Variante", 'Kommissionsnummer']].groupby(['Datum', 'Modell', "Variante"]).nunique().reset_index()

In [8]:
fig = px.line(
    varianten_df,
    x="Datum",
    y="Kommissionsnummer",
    color="Variante",
    title='Anzahl Fahrzeuge Pro Woche nach Variante',
    facet_col="Modell",
    markers=True,
    labels={
        "Kommissionsnummer": "Anzahl",
    }
)

fig.write_html("../html/images/variants_count.html", include_plotlyjs="cdn", full_html=False)

In [9]:
kommissionsbereich_df = df[['Datum', 'Modell', "Kommissionsbereich", 'Kommissionsnummer']].groupby(['Datum', 'Modell', "Kommissionsbereich"]).nunique().reset_index()

In [10]:
fig = px.line(
    kommissionsbereich_df,
    x="Datum",
    y="Kommissionsnummer",
    color="Kommissionsbereich",
    title='Anzahl Fahrzeuge Pro Woche nach Kommisionsnummerbereich',
    facet_col="Modell",
    markers=True,
    labels={
        "Kommissionsnummer": "Anzahl",
    }
)

fig.write_html("../html/images/kommbereich_count.html", include_plotlyjs="cdn", full_html=False)

In [11]:
fin_df = df[['Datum', 'Modell', 'FIN']].groupby(['Datum', 'Modell']).nunique().reset_index()

In [12]:
fig = px.line(
    fin_df,
    x="Datum",
    y="FIN",
    color="Modell",
    title='Anzahl FINs Pro Woche nach Modell',
    markers=True,
    labels={
        "FIN": "Anzahl",
    }
)

fig.write_html("../html/images/fin_count.html", include_plotlyjs="cdn", full_html=False)

In [13]:
fin_varianten_df = df[['Datum', 'Modell', "Variante", 'FIN']].groupby(['Datum', 'Modell', "Variante"]).nunique().reset_index()

In [14]:
fig = px.line(
    fin_varianten_df,
    x="Datum",
    y="FIN",
    color="Variante",
    title='Anzahl FINs Pro Woche nach Variante',
    facet_col="Modell",
    markers=True,
    labels={
        "FIN": "Anzahl",
    }
)

fig.write_html("../html/images/fin_variants_count.html", include_plotlyjs="cdn", full_html=False)

In [15]:
bereich_varianten_df = df[['Datum', 'Modell', "Kommissionsbereich", 'FIN']].groupby(['Datum', 'Modell', "Kommissionsbereich"]).nunique().reset_index()

In [16]:
fig = px.line(
    bereich_varianten_df,
    x="Datum",
    y="FIN",
    color="Kommissionsbereich",
    title='Anzahl FINs Pro Woche nach Kommisionsnummerbereich',
    facet_col="Modell",
    markers=True,
    labels={
        "FIN": "Anzahl",
    }
)

fig.write_html("../html/images/komm_range_count.html", include_plotlyjs="cdn", full_html=False)