## Dependencies

In [1]:
import pandas as pd
import numpy as np
import os
import glob

import matplotlib.pyplot as plt
plt.rcParams['figure.figsize'] = (15, 5)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)


In [2]:
indataPath = os.path.join(os.pardir, "indata")

ddfRootPath = os.path.join(indataPath, "ddf--sodertornsmodellen")
ddfSrcPath = os.path.join(ddfRootPath, "ddf--sodertornsmodellen--src")

superPath = os.path.join(indataPath, "supermappen")

ddfOutputPath = os.path.join(os.pardir, 'ddf--sodertornsmodellen-output', 'ddf--sodertornsmodellen--src')

## Helpers

In [3]:
from helpers import (readMaster, appendNewDatapoints, byGender, baskod2010tobasomrade, plotcombined)

## Läs in MASTER excel filen

In [4]:
master = readMaster()

## Kombinera `.csv` med MASTER 

In [13]:
csv = pd.read_csv('../indata/supermappen/Tabell2A_FB7bas.csv', encoding='latin1' , sep=';', decimal=',')
csv['year'] = pd.to_datetime('2017')
csv = csv.rename(columns={
    "Basomrade": "BASKOD2010",
    "Hyresrätt": "antal_hyres",
    "Bostadsrätt": "antal_bostadsr",
    "Äganderätt": "antal_aegender",
    "Uppgift_saknas": "Lght_Uppgift saknas",
    "TOTALT": "Lght total"
})

subsetmas = master[['BASKODER','year', 'Lght_Hyresrätt', 'Lght_Bostadsrätt','Lght_Äganderätt','Lght_Uppgift saknas','Lght total']]
subsetmas = subsetmas.rename(columns = {
    "Lght_Hyresrätt": "antal_hyres",
    "Lght_Bostadsrätt": "antal_bostadsr",
    "Lght_Äganderätt": "antal_aegender"
})

In [14]:
mas2012 = subsetmas[subsetmas['year'] >= pd.to_datetime('2012')]
mas2012 = mas2012.rename(columns = {
    'BASKODER': 'BASKOD2010'
})

In [15]:
concat = pd.concat([csv, mas2012], sort=False)

## Summera efter år och Basområde2000 kod

In [30]:
merged = baskod2010tobasomrade(concat)

In [20]:
# sum values by year and baskod2000
subset = merged[['year','BASKOD2000', 'basomrade', 'antal_hyres', 'antal_bostadsr', 'antal_aegender', 'Lght total']]
summed = pd.DataFrame(subset.groupby(['year','basomrade']).sum()).reset_index()

In [21]:
summed['year'] = summed['year'].apply(lambda t: t.year)

In [22]:
# ta bort rader som saknar data
summed = summed.dropna(how='any')

In [23]:
summed[['year', 'Lght total']].groupby('year').sum()

Unnamed: 0_level_0,Lght total
year,Unnamed: 1_level_1
2012,943331.0
2013,950892.0
2014,959773.0
2015,971836.0
2016,990371.0
2017,1028961.0


## Räkna andelar och exportera

#### Andel hyresrätter

In [24]:
summed['andel_hyres'] = summed['antal_hyres'] / summed['Lght total']
summed['andel_hyres'] = summed['andel_hyres'].round(4)
summed['andel_hyres'] = summed['andel_hyres'].astype(str).replace('0.0', '0')
summed['andel_hyres'] = summed['andel_hyres'].astype(str).replace('1.0', '1')
summed['andel_hyres'] = summed['andel_hyres'].astype(str).replace('nan', '')

In [25]:
summed[['basomrade', 'year', 'andel_hyres']].to_csv(os.path.join(ddfOutputPath, 'ddf--datapoints--andel_hyres--by--basomrade--year.csv'), index=False)

#### Andel bostadsrätter

In [26]:
summed['andel_bostadsr'] = summed['antal_bostadsr'] / summed['Lght total']
summed['andel_bostadsr'] = summed['andel_bostadsr'].round(4)
summed['andel_bostadsr'] = summed['andel_bostadsr'].astype(str).replace('0.0', '0')
summed['andel_bostadsr'] = summed['andel_bostadsr'].astype(str).replace('1.0', '1')
summed['andel_bostadsr'] = summed['andel_bostadsr'].astype(str).replace('nan', '')

In [27]:
summed[['basomrade', 'year', 'andel_bostadsr']].to_csv(os.path.join(ddfOutputPath, 'ddf--datapoints--andel_bostadsr--by--basomrade--year.csv'), index=False)

#### Andel äganderätter

In [28]:
summed['andel_aegender'] = summed['antal_aegender'] / summed['Lght total']
summed['andel_aegender'] = summed['andel_aegender'].round(4)
summed['andel_aegender'] = summed['andel_aegender'].astype(str).replace('0.0', '0')
summed['andel_aegender'] = summed['andel_aegender'].astype(str).replace('1.0', '1')
summed['andel_aegender'] = summed['andel_aegender'].astype(str).replace('nan', '')

In [29]:
summed[['basomrade', 'year', 'andel_aegender']].to_csv(os.path.join(ddfOutputPath, 'ddf--datapoints--andel_aegender--by--basomrade--year.csv'), index=False)