# Partnervertrag Analyse: Datenpräparation

In [1]:
%load_ext autoreload
%autoreload

import pandas as pd
import qgrid

from pa_lib.file  import store_bin
from pa_lib.data  import desc_col, as_dtype, as_date, split_date_iso
from pa_lib.util  import obj_size
from pa_lib.types import dtFactor
from pa_lib.sql   import query
from pa_lib.ora   import Connection
from pa_lib.log   import info

# display long columns completely
pd.set_option('display.max_colwidth', 200)

## Daten einlesen

In [2]:
pv_query = query('pv')

In [3]:
info('Starting PV query on APC Prod instance')
with Connection('APC_PROD_VDWH1') as c:
    pv_data_raw = c.long_query(pv_query)
info(f'Finished PV query, returned {obj_size(pv_data_raw)} of data: {pv_data_raw.shape}')

2019-05-15 15:26:59 [INFO] Starting PV query on APC Prod instance
2019-05-15 15:28:10 [INFO] Finished query in 71.41s (8.54s CPU)
2019-05-15 15:28:12 [INFO] Finished PV query, returned 232.3 MB of data: (638265, 11)


In [4]:
pv_data_raw.head()

Unnamed: 0,PV_NR,JAHR_KW,JAHR,KW,RES_BRUTTO,RES_NETTO,AUS_BRUTTO,AUS_NETTO,PV_TITEL,PARTNER_NR,PARTNER
0,6343,201403,2014,3,2252.0,1607.575,3111.807692,1551.038741,Brig-Glis - Gemeindevertrag [6343],121149,Stadtgemeinde Brig-Glis [121149]
1,31591,201704,2017,4,5679.5,3371.635614,11138.653846,5038.083059,"Dietikon, Rahmenvertrag Unterhaltsbezirk 3 [31591]",100988,Baudirektion Kanton Zürich [100988]
2,3547,201430,2014,30,19707.0,9300.944846,16007.472527,8608.231172,Sion - Convention d'affichage [3547],118107,Municipalité de Sion [118107]
3,20896,201418,2014,18,11613.0,7567.45992,12694.883145,7817.615358,Vevey - Convention d'affichage [20896],122045,Ville de Vevey [122045]
4,309650,201405,2014,5,302.0,119.768151,151.0,58.44674,"Olten, Sportstrasse 89 +91 [309650]",500110,Eishockey Club Olten AG EHCO [500110]


In [6]:
desc_col(pv_data_raw)

Unnamed: 0,DTYPE,NULLS,UNIQUE
PV_NR,int64,0/638265,7818
JAHR_KW,int64,0/638265,280
JAHR,object,0/638265,6
KW,object,0/638265,53
RES_BRUTTO,float64,0/638265,87294
RES_NETTO,float64,0/638265,221436
AUS_BRUTTO,float64,0/638265,160871
AUS_NETTO,float64,0/638265,307174
PV_TITEL,object,0/638265,7818
PARTNER_NR,int64,0/638265,5753


## Leerwerte bereinigen, Datentypen korrigieren

In [7]:
pv_data_raw = pv_data_raw.dropna(how='any')

In [8]:
(obj_size(pv_data_raw), pv_data_raw.shape)

('254.5 MB', (638265, 11))

In [9]:
pv_data_raw = pv_data_raw.pipe(as_dtype, dtFactor, incl_dtype='object')

In [10]:
(obj_size(pv_data_raw), pv_data_raw.shape)

('44.6 MB', (638265, 11))

In [11]:
desc_col(pv_data_raw, det=True)

Unnamed: 0,DTYPE,NULLS,UNIQUE,MEM,RANGE
PV_NR,int64,0/638265,7818,9.7 MB,"[26,311495]"
JAHR_KW,int64,0/638265,280,9.7 MB,"[201401,201919]"
JAHR,category,0/638265,6,5.5 MB,"[2014,2019]"
KW,category,0/638265,53,5.5 MB,"[01,53]"
RES_BRUTTO,float64,0/638265,87294,9.7 MB,"[3.55,4089637.83883066]"
RES_NETTO,float64,0/638265,221436,9.7 MB,"[-25.599412008256,3221578.3203589036]"
AUS_BRUTTO,float64,0/638265,160871,9.7 MB,"[0.692307692308,1577148.971208175]"
AUS_NETTO,float64,0/638265,307174,9.7 MB,"[-1.185886821269,940441.8088653579]"
PV_TITEL,category,0/638265,7818,7.4 MB,"[ Bern - Bethlehemstrasse 24 [26973],zb Zentralbahn AG [32060]]"
PARTNER_NR,int64,0/638265,5753,9.7 MB,"[100035,655489]"


In [13]:
pv_data_raw.head()

Unnamed: 0,PV_NR,JAHR_KW,JAHR,KW,RES_BRUTTO,RES_NETTO,AUS_BRUTTO,AUS_NETTO,PV_TITEL,PARTNER_NR,PARTNER
0,6343,201403,2014,3,2252.0,1607.575,3111.807692,1551.038741,Brig-Glis - Gemeindevertrag [6343],121149,Stadtgemeinde Brig-Glis [121149]
1,31591,201704,2017,4,5679.5,3371.635614,11138.653846,5038.083059,"Dietikon, Rahmenvertrag Unterhaltsbezirk 3 [31591]",100988,Baudirektion Kanton Zürich [100988]
2,3547,201430,2014,30,19707.0,9300.944846,16007.472527,8608.231172,Sion - Convention d'affichage [3547],118107,Municipalité de Sion [118107]
3,20896,201418,2014,18,11613.0,7567.45992,12694.883145,7817.615358,Vevey - Convention d'affichage [20896],122045,Ville de Vevey [122045]
4,309650,201405,2014,5,302.0,119.768151,151.0,58.44674,"Olten, Sportstrasse 89 +91 [309650]",500110,Eishockey Club Olten AG EHCO [500110]


## Netto = 0 ausfiltern, sortieren, Geschäftsjahr und -woche für Aushang und Reservation berechnen

In [14]:
pv_data = (pv_data_raw.query('AUS_NETTO > 0')
           .sort_values(['JAHR_KW', 'PV_NR'])
           .reset_index(drop=True))

In [15]:
desc_col(pv_data, det=True)

Unnamed: 0,DTYPE,NULLS,UNIQUE,MEM,RANGE
PV_NR,int64,0/626521,7761,4.8 MB,"[26,311493]"
JAHR_KW,int64,0/626521,280,4.8 MB,"[201401,201919]"
JAHR,category,0/626521,6,612.4 KB,"[2014,2019]"
KW,category,0/626521,53,617.5 KB,"[01,53]"
RES_BRUTTO,float64,0/626521,86638,4.8 MB,"[3.55,4089637.83883066]"
RES_NETTO,float64,0/626521,219593,4.8 MB,"[-25.599412008256,3221578.3203589036]"
AUS_BRUTTO,float64,0/626521,160613,4.8 MB,"[0.692307692308,1577148.971208175]"
AUS_NETTO,float64,0/626521,307171,4.8 MB,"[0.512406219875,940441.8088653579]"
PV_TITEL,category,0/626521,7761,2.5 MB,"[ Bern - Bethlehemstrasse 24 [26973],zb Zentralbahn AG [32060]]"
PARTNER_NR,int64,0/626521,5724,4.8 MB,"[100035,655489]"


## Vertragsinformationen extrahieren

In [23]:
pv_idx = pv_data.groupby('PV_NR', as_index=True)

In [24]:
pv_info = pv_idx.agg({'PV_TITEL': 'first', 'RES_BRUTTO': 'sum', 'RES_NETTO': 'sum', 'AUS_BRUTTO': 'sum', 'AUS_NETTO': 'sum', 'PARTNER_NR': 'last', 'PARTNER': 'last',
                      'JAHR_KW': ['min', 'max']})
pv_info.columns = 'Titel totalResBrutto, totalResNetto totalAusBrutto totalAusNetto partnerNr Partner firstKw lastKw'.split()

In [25]:
desc_col(pv_info, det=True)

Unnamed: 0,DTYPE,NULLS,UNIQUE,MEM,RANGE
Titel,object,0/7761,7761,1.1 MB,"[ Bern - Bethlehemstrasse 24 [26973],zb Zentralbahn AG [32060]]"
"totalResBrutto,",float64,0/7761,7691,121.3 KB,"[21.571428571429,201559729.9001885]"
totalResNetto,float64,0/7761,7739,121.3 KB,"[0.0,129567091.8999057]"
totalAusBrutto,float64,0/7761,7695,121.3 KB,"[4.153846153848,212121311.80940267]"
totalAusNetto,float64,0/7761,7752,121.3 KB,"[4.153846153848,137795544.8679387]"
partnerNr,int64,0/7761,5724,121.3 KB,"[100035,655489]"
Partner,object,0/7761,5724,814.1 KB,"[""Zürich"" Versicherungs-Gesellschaft [495776],Özdemir Ökkes [614921]]"
firstKw,int64,0/7761,259,121.3 KB,"[201401,201919]"
lastKw,int64,0/7761,262,121.3 KB,"[201401,201919]"


In [26]:
qgrid.show_grid(pv_info)

QgridWidget(grid_options={'fullWidthRows': True, 'syncColumnCellResize': True, 'forceFitColumns': True, 'defau…

#### Jahres-Nettoumsätze

In [20]:
pvYearANetto = pv_data.groupby(['PV_NR', 'JAHR'], observed=True, as_index=False)[['AUS_NETTO']].agg('sum')
pvYearRNetto = pv_data.groupby(['PV_NR', 'JAHR'], observed=True, as_index=False)[['RES_NETTO']].agg('sum')
pvANetto = pvYearANetto.pivot(index='PV_NR', columns='JAHR', values='AUS_NETTO').fillna(0).add_prefix('Netto_Aus_')
pvRNetto = pvYearRNetto.pivot(index='PV_NR', columns='JAHR', values='RES_NETTO').fillna(0).add_prefix('Netto_Res_')

In [21]:
pv_info = pv_info.merge(pvANetto, on='PV_NR').merge(pvRNetto, on='PV_NR')

## Daten speichern

In [22]:
store_bin(pv_data, 'pv_data.feather')
store_bin(pv_info, 'pv_info.feather')

2019-05-15 16:25:05 [INFO] Writing to file /home/pa/data/pv_data.feather
2019-05-15 16:25:05 [INFO] Written 37.6 MB
2019-05-15 16:25:05 [INFO] Finished storing binary file in 0.1s (0.68s CPU)
2019-05-15 16:25:05 [INFO] Writing to file /home/pa/data/pv_info.feather
2019-05-15 16:25:05 [INFO] Written 1.8 MB
2019-05-15 16:25:05 [INFO] Finished storing binary file in 0.02s (0.13s CPU)
