# Partnervertrag Analyse: Datenpräparation

In [28]:
%load_ext autoreload
%autoreload

import pandas as pd
import qgrid

from pa_lib.file  import store_bin
from pa_lib.data  import desc_col, as_dtype, as_date, split_date_iso
from pa_lib.util  import obj_size
from pa_lib.types import dtFactor
from pa_lib.sql   import query
from pa_lib.ora   import Connection
from pa_lib.log   import info

# display long columns completely
pd.set_option('display.max_colwidth', 200)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Daten einlesen

In [29]:
pv_query = query('pv_2')

In [30]:
info('Starting PV query on APC Prod instance')
with Connection('APC_PROD_VDWH1') as c:
    pv_data_raw = c.long_query(pv_query)
info(f'Finished PV query, returned {obj_size(pv_data_raw)} of data: {pv_data_raw.shape}')

2019-05-20 13:20:44 [INFO] Starting PV query on APC Prod instance
2019-05-20 13:21:22 [INFO] Finished query in 37.21s (10.12s CPU)
2019-05-20 13:21:24 [INFO] Finished PV query, returned 344.6 MB of data: (959412, 11)


In [31]:
pv_data_raw.head()

Unnamed: 0,PV_NR,JAHR_KW,JAHR,KW,RES_BRUTTO,RES_NETTO,AUS_BRUTTO,AUS_NETTO,PV_TITEL,PARTNER_NR,PARTNER
0,16278,201641,2016,41,9891.0,9613.25,2952.75,2095.048909,Prilly - Route de Renens\nPassage piétons [16278],130811,Centre Intercommunal de Glace de Malley SA [130811]
1,13414,201443,2014,43,1304.5,330.232953,1355.905038,969.973317,Heimberg - Gemeindegebiet (öffentlicher Grund) [13414],103797,Einwohnergemeinde Heimberg [103797]
2,24975,201533,2015,33,2407.0,2407.0,2524.637363,2450.832005,"Zug, Baudirektion des Kantons Zug, Plakatvertrag\n [24975]",105637,Baudirektion des Kantons Zug [105637]
3,11565,201628,2016,28,0.0,0.0,1294.0,1292.6,"div. Stellen Kernzone, Bern [11565]",144093,Tiefbauamt der Stadt Bern [144093]
4,1151,201644,2016,44,0.0,0.0,192.0,66.883729,"Hauptstrasse 139, 5615 Fahrwangen [1151]",106465,Rodel Walter [106465]


In [32]:
desc_col(pv_data_raw)

Unnamed: 0,DTYPE,NULLS,UNIQUE
PV_NR,int64,0/959412,4210
JAHR_KW,int64,0/959412,383
JAHR,object,0/959412,8
KW,object,0/959412,53
RES_BRUTTO,float64,0/959412,64092
RES_NETTO,float64,0/959412,187229
AUS_BRUTTO,float64,0/959412,143078
AUS_NETTO,float64,0/959412,325612
PV_TITEL,object,0/959412,4210
PARTNER_NR,int64,0/959412,3176


## Leerwerte bereinigen, Datentypen korrigieren

In [33]:
pv_data_raw = pv_data_raw.dropna(how='any')

In [34]:
(obj_size(pv_data_raw), pv_data_raw.shape)

('373.7 MB', (959412, 11))

In [35]:
pv_data_raw = pv_data_raw.pipe(as_dtype, dtFactor, incl_dtype='object')

In [36]:
(obj_size(pv_data_raw), pv_data_raw.shape)

('65.2 MB', (959412, 11))

In [37]:
desc_col(pv_data_raw, det=True)

Unnamed: 0,DTYPE,NULLS,UNIQUE,MEM,RANGE
PV_NR,int64,0/959412,4210,14.6 MB,"[28,307931]"
JAHR_KW,int64,0/959412,383,14.6 MB,"[201401,202117]"
JAHR,category,0/959412,8,8.2 MB,"[2014,2021]"
KW,category,0/959412,53,8.2 MB,"[01,53]"
RES_BRUTTO,float64,0/959412,64092,14.6 MB,"[0.0,1818913.5130294955]"
RES_NETTO,float64,0/959412,187229,14.6 MB,"[-7.14216149067,1107719.8292558126]"
AUS_BRUTTO,float64,0/959412,143078,14.6 MB,"[0.0,429794.512849719]"
AUS_NETTO,float64,0/959412,325612,14.6 MB,"[-1.185886821269,285173.55827892455]"
PV_TITEL,category,0/959412,4210,9.8 MB,"[ 1800 Vevey - Rue du Panorama 12 [24493],öffentlicher Grund [12466]]"
PARTNER_NR,int64,0/959412,3176,14.6 MB,"[100035,655161]"


In [38]:
pv_data_raw.head()

Unnamed: 0,PV_NR,JAHR_KW,JAHR,KW,RES_BRUTTO,RES_NETTO,AUS_BRUTTO,AUS_NETTO,PV_TITEL,PARTNER_NR,PARTNER
0,16278,201641,2016,41,9891.0,9613.25,2952.75,2095.048909,Prilly - Route de Renens\nPassage piétons [16278],130811,Centre Intercommunal de Glace de Malley SA [130811]
1,13414,201443,2014,43,1304.5,330.232953,1355.905038,969.973317,Heimberg - Gemeindegebiet (öffentlicher Grund) [13414],103797,Einwohnergemeinde Heimberg [103797]
2,24975,201533,2015,33,2407.0,2407.0,2524.637363,2450.832005,"Zug, Baudirektion des Kantons Zug, Plakatvertrag\n [24975]",105637,Baudirektion des Kantons Zug [105637]
3,11565,201628,2016,28,0.0,0.0,1294.0,1292.6,"div. Stellen Kernzone, Bern [11565]",144093,Tiefbauamt der Stadt Bern [144093]
4,1151,201644,2016,44,0.0,0.0,192.0,66.883729,"Hauptstrasse 139, 5615 Fahrwangen [1151]",106465,Rodel Walter [106465]


## Netto = 0 ausfiltern, sortieren, Geschäftsjahr und -woche für Aushang und Reservation berechnen

In [39]:
pv_data = (pv_data_raw.query('AUS_NETTO > 0')
           .sort_values(['JAHR_KW', 'PV_NR'])
           .reset_index(drop=True))

In [40]:
desc_col(pv_data, det=True)

Unnamed: 0,DTYPE,NULLS,UNIQUE,MEM,RANGE
PV_NR,int64,0/845123,4207,6.4 MB,"[28,307931]"
JAHR_KW,int64,0/845123,383,6.4 MB,"[201401,202117]"
JAHR,category,0/845123,8,826.2 KB,"[2014,2021]"
KW,category,0/845123,53,830.9 KB,"[01,53]"
RES_BRUTTO,float64,0/845123,58542,6.4 MB,"[0.0,1818913.5130294955]"
RES_NETTO,float64,0/845123,167049,6.4 MB,"[-7.14216149067,1107719.8292558126]"
AUS_BRUTTO,float64,0/845123,142836,6.4 MB,"[1.0,429794.512849719]"
AUS_NETTO,float64,0/845123,325609,6.4 MB,"[0.263904606228,285173.55827892455]"
PV_TITEL,category,0/845123,4207,2.3 MB,"[ 1800 Vevey - Rue du Panorama 12 [24493],öffentlicher Grund [12466]]"
PARTNER_NR,int64,0/845123,3174,6.4 MB,"[100035,655161]"


## Vertragsinformationen extrahieren

In [41]:
pv_idx = pv_data.groupby('PV_NR', as_index=True)

In [42]:
pv_info = pv_idx.agg({'PV_TITEL': 'first', 'RES_BRUTTO': 'sum', 'RES_NETTO': 'sum', 'AUS_BRUTTO': 'sum', 'AUS_NETTO': 'sum', 'PARTNER_NR': 'last', 'PARTNER': 'last',
                      'JAHR_KW': ['min', 'max']})
pv_info.columns = 'Titel totalResBrutto, totalResNetto totalAusBrutto totalAusNetto partnerNr Partner firstKw lastKw'.split()

In [43]:
desc_col(pv_info, det=True)

Unnamed: 0,DTYPE,NULLS,UNIQUE,MEM,RANGE
Titel,object,0/4207,4207,546.2 KB,"[ 1800 Vevey - Rue du Panorama 12 [24493],öffentlicher Grund [12466]]"
"totalResBrutto,",float64,0/4207,4066,65.7 KB,"[0.0,38970086.93807321]"
totalResNetto,float64,0/4207,4080,65.7 KB,"[0.0,23869277.3785922]"
totalAusBrutto,float64,0/4207,4119,65.7 KB,"[47.0,39530357.74333258]"
totalAusNetto,float64,0/4207,4121,65.7 KB,"[39.95,24250531.60974595]"
partnerNr,int64,0/4207,3174,65.7 KB,"[100035,655161]"
Partner,object,0/4207,3174,442.8 KB,"[19gale GmbH [521739],zb Zentralbahn AG [464509]]"
firstKw,int64,0/4207,70,65.7 KB,"[201401,201845]"
lastKw,int64,0/4207,287,65.7 KB,"[201405,202117]"


In [44]:
qgrid.show_grid(pv_info)

QgridWidget(grid_options={'fullWidthRows': True, 'syncColumnCellResize': True, 'forceFitColumns': True, 'defau…

#### Jahres-Nettoumsätze

In [45]:
pvYearANetto = pv_data.groupby(['PV_NR', 'JAHR'], observed=True, as_index=False)[['AUS_NETTO']].agg('sum')
pvYearRNetto = pv_data.groupby(['PV_NR', 'JAHR'], observed=True, as_index=False)[['RES_NETTO']].agg('sum')
pvANetto = pvYearANetto.pivot(index='PV_NR', columns='JAHR', values='AUS_NETTO').fillna(0).add_prefix('Netto_Aus_')
pvRNetto = pvYearRNetto.pivot(index='PV_NR', columns='JAHR', values='RES_NETTO').fillna(0).add_prefix('Netto_Res_')

In [46]:
pv_info = pv_info.merge(pvANetto, on='PV_NR').merge(pvRNetto, on='PV_NR')

## Daten speichern

In [47]:
store_bin(pv_data, 'pv_data.feather')
store_bin(pv_info, 'pv_info.feather')

2019-05-20 13:24:11 [INFO] Writing to file /home/pa/data/pv_data.feather
2019-05-20 13:24:11 [INFO] Written 50.3 MB
2019-05-20 13:24:11 [INFO] Finished storing binary file in 0.12s (0.89s CPU)
2019-05-20 13:24:11 [INFO] Writing to file /home/pa/data/pv_info.feather
2019-05-20 13:24:11 [INFO] Written 1.1 MB
2019-05-20 13:24:11 [INFO] Finished storing binary file in 0.02s (0.12s CPU)
